2015-07-07 21:43:32 +00:00
|
|
|
// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
|
|
|
|
// for details. All rights reserved. Use of this source code is governed by a
|
|
|
|
// BSD-style license that can be found in the LICENSE file.
|
|
|
|
|
2017-09-04 11:13:21 +00:00
|
|
|
#if !defined(DART_PRECOMPILED_RUNTIME)
|
|
|
|
|
2015-07-07 21:43:32 +00:00
|
|
|
#include "vm/regexp_assembler_ir.h"
|
|
|
|
|
2019-04-22 20:15:43 +00:00
|
|
|
#include "platform/unicode.h"
|
2015-07-07 21:43:32 +00:00
|
|
|
#include "vm/bit_vector.h"
|
2017-09-04 14:18:37 +00:00
|
|
|
#include "vm/compiler/backend/il_printer.h"
|
|
|
|
#include "vm/compiler/frontend/flow_graph_builder.h"
|
|
|
|
#include "vm/compiler/jit/compiler.h"
|
2015-07-07 21:43:32 +00:00
|
|
|
#include "vm/dart_entry.h"
|
2018-12-05 18:57:02 +00:00
|
|
|
#include "vm/longjump.h"
|
2015-07-07 21:43:32 +00:00
|
|
|
#include "vm/object_store.h"
|
|
|
|
#include "vm/regexp.h"
|
|
|
|
#include "vm/resolver.h"
|
2015-09-02 21:58:26 +00:00
|
|
|
#include "vm/runtime_entry.h"
|
2015-07-07 21:43:32 +00:00
|
|
|
#include "vm/stack_frame.h"
|
|
|
|
|
|
|
|
#define Z zone()
|
|
|
|
|
|
|
|
// Debugging output macros. TAG() is called at the head of each interesting
|
|
|
|
// function and prints its name during execution if irregexp tracing is enabled.
|
2016-11-08 21:54:47 +00:00
|
|
|
#define TAG() \
|
|
|
|
if (FLAG_trace_irregexp) { \
|
|
|
|
TAG_(); \
|
|
|
|
}
|
|
|
|
#define TAG_() \
|
2020-01-09 01:37:27 +00:00
|
|
|
Print(Bind(new (Z) ConstantInstr(String::ZoneHandle( \
|
2016-11-08 21:54:47 +00:00
|
|
|
Z, String::Concat(String::Handle(String::New("TAG: ")), \
|
|
|
|
String::Handle(String::New(__FUNCTION__)), \
|
2020-01-09 01:37:27 +00:00
|
|
|
Heap::kOld)))));
|
2016-11-08 21:54:47 +00:00
|
|
|
|
|
|
|
#define PRINT(arg) \
|
|
|
|
if (FLAG_trace_irregexp) { \
|
|
|
|
Print(arg); \
|
|
|
|
}
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
namespace dart {
|
|
|
|
|
|
|
|
static const intptr_t kMinStackSize = 512;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This assembler uses the following main local variables:
|
|
|
|
* - stack_: A pointer to a growable list which we use as an all-purpose stack
|
|
|
|
* storing backtracking offsets, positions & stored register values.
|
|
|
|
* - current_character_: Stores the currently loaded characters (possibly more
|
|
|
|
* than one).
|
|
|
|
* - current_position_: The current position within the string, stored as a
|
|
|
|
* negative offset from the end of the string (i.e. the
|
|
|
|
* position corresponding to str[0] is -str.length).
|
|
|
|
* Note that current_position_ is *not* byte-based, unlike
|
|
|
|
* original V8 code.
|
|
|
|
*
|
|
|
|
* Results are returned though an array of capture indices, stored at
|
|
|
|
* matches_param_. A null array specifies a failure to match. The match indices
|
|
|
|
* [start_inclusive, end_exclusive] for capture group i are stored at positions
|
|
|
|
* matches_param_[i * 2] and matches_param_[i * 2 + 1], respectively. Match
|
|
|
|
* indices of -1 denote non-matched groups. Note that we store these indices
|
|
|
|
* as a negative offset from the end of the string in registers_array_
|
|
|
|
* during processing, and convert them to standard indexes when copying them
|
|
|
|
* to matches_param_ on successful match.
|
|
|
|
*/
|
|
|
|
IRRegExpMacroAssembler::IRRegExpMacroAssembler(
|
|
|
|
intptr_t specialization_cid,
|
|
|
|
intptr_t capture_count,
|
|
|
|
const ParsedFunction* parsed_function,
|
|
|
|
const ZoneGrowableArray<const ICData*>& ic_data_array,
|
2017-06-23 10:51:52 +00:00
|
|
|
intptr_t osr_id,
|
2015-07-07 21:43:32 +00:00
|
|
|
Zone* zone)
|
|
|
|
: RegExpMacroAssembler(zone),
|
2017-05-25 17:12:19 +00:00
|
|
|
thread_(Thread::Current()),
|
2015-07-07 21:43:32 +00:00
|
|
|
specialization_cid_(specialization_cid),
|
|
|
|
parsed_function_(parsed_function),
|
|
|
|
ic_data_array_(ic_data_array),
|
|
|
|
current_instruction_(NULL),
|
|
|
|
stack_(NULL),
|
|
|
|
stack_pointer_(NULL),
|
|
|
|
current_character_(NULL),
|
|
|
|
current_position_(NULL),
|
|
|
|
string_param_(NULL),
|
|
|
|
string_param_length_(NULL),
|
|
|
|
start_index_param_(NULL),
|
|
|
|
registers_count_(0),
|
|
|
|
saved_registers_count_((capture_count + 1) * 2),
|
|
|
|
stack_array_cell_(Array::ZoneHandle(zone, Array::New(1, Heap::kOld))),
|
|
|
|
// The registers array is allocated at a fixed size after assembly.
|
2017-06-23 10:51:52 +00:00
|
|
|
registers_array_(TypedData::ZoneHandle(zone, TypedData::null())),
|
|
|
|
// B0 is taken by GraphEntry thus block ids must start at 1.
|
|
|
|
block_id_(1) {
|
2015-07-07 21:43:32 +00:00
|
|
|
switch (specialization_cid) {
|
|
|
|
case kOneByteStringCid:
|
2016-11-08 21:54:47 +00:00
|
|
|
case kExternalOneByteStringCid:
|
|
|
|
mode_ = ASCII;
|
|
|
|
break;
|
2015-07-07 21:43:32 +00:00
|
|
|
case kTwoByteStringCid:
|
2016-11-08 21:54:47 +00:00
|
|
|
case kExternalTwoByteStringCid:
|
|
|
|
mode_ = UC16;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
UNREACHABLE();
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
InitializeLocals();
|
|
|
|
|
|
|
|
// Allocate an initial stack backing of the minimum stack size. The stack
|
|
|
|
// backing is indirectly referred to so we can reuse it on subsequent matches
|
|
|
|
// even in the case where the backing has been enlarged and thus reallocated.
|
2016-11-08 21:54:47 +00:00
|
|
|
stack_array_cell_.SetAt(
|
|
|
|
0,
|
|
|
|
TypedData::Handle(zone, TypedData::New(kTypedDataInt32ArrayCid,
|
|
|
|
kMinStackSize / 4, Heap::kOld)));
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
// Create and generate all preset blocks.
|
2018-10-15 13:02:51 +00:00
|
|
|
entry_block_ = new (zone) GraphEntryInstr(*parsed_function_, osr_id);
|
|
|
|
|
|
|
|
auto function_entry = new (zone) FunctionEntryInstr(
|
|
|
|
entry_block_, block_id_.Alloc(), kInvalidTryIndex, GetNextDeoptId());
|
|
|
|
entry_block_->set_normal_entry(function_entry);
|
|
|
|
|
2017-05-25 17:12:19 +00:00
|
|
|
start_block_ = new (zone)
|
|
|
|
JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex, GetNextDeoptId());
|
|
|
|
success_block_ = new (zone)
|
|
|
|
JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex, GetNextDeoptId());
|
|
|
|
backtrack_block_ = new (zone)
|
|
|
|
JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex, GetNextDeoptId());
|
|
|
|
exit_block_ = new (zone)
|
|
|
|
JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex, GetNextDeoptId());
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
GenerateEntryBlock();
|
|
|
|
GenerateSuccessBlock();
|
|
|
|
GenerateExitBlock();
|
|
|
|
|
|
|
|
blocks_.Add(entry_block_);
|
|
|
|
blocks_.Add(entry_block_->normal_entry());
|
|
|
|
blocks_.Add(start_block_);
|
|
|
|
blocks_.Add(success_block_);
|
|
|
|
blocks_.Add(backtrack_block_);
|
|
|
|
blocks_.Add(exit_block_);
|
|
|
|
|
|
|
|
// Begin emission at the start_block_.
|
|
|
|
set_current_instruction(start_block_);
|
|
|
|
}
|
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
IRRegExpMacroAssembler::~IRRegExpMacroAssembler() {}
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::InitializeLocals() {
|
|
|
|
// All generated functions are expected to have a current-context variable.
|
|
|
|
// This variable is unused in irregexp functions.
|
2018-06-07 11:35:58 +00:00
|
|
|
parsed_function_->current_context_var()->set_index(
|
2018-06-11 09:55:54 +00:00
|
|
|
VariableIndex(GetNextLocalIndex()));
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
// Create local variables and parameters.
|
|
|
|
stack_ = Local(Symbols::stack());
|
|
|
|
stack_pointer_ = Local(Symbols::stack_pointer());
|
|
|
|
registers_ = Local(Symbols::position_registers());
|
|
|
|
current_character_ = Local(Symbols::current_character());
|
|
|
|
current_position_ = Local(Symbols::current_position());
|
|
|
|
string_param_length_ = Local(Symbols::string_param_length());
|
|
|
|
capture_length_ = Local(Symbols::capture_length());
|
|
|
|
match_start_index_ = Local(Symbols::match_start_index());
|
|
|
|
capture_start_index_ = Local(Symbols::capture_start_index());
|
|
|
|
match_end_index_ = Local(Symbols::match_end_index());
|
|
|
|
char_in_capture_ = Local(Symbols::char_in_capture());
|
|
|
|
char_in_match_ = Local(Symbols::char_in_match());
|
|
|
|
index_temp_ = Local(Symbols::index_temp());
|
2020-11-23 07:51:05 +00:00
|
|
|
result_ = Local(Symbols::c_result());
|
2015-07-07 21:43:32 +00:00
|
|
|
|
2015-09-04 08:06:02 +00:00
|
|
|
string_param_ = Parameter(Symbols::string_param(),
|
|
|
|
RegExpMacroAssembler::kParamStringIndex);
|
|
|
|
start_index_param_ = Parameter(Symbols::start_index_param(),
|
|
|
|
RegExpMacroAssembler::kParamStartOffsetIndex);
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::GenerateEntryBlock() {
|
|
|
|
set_current_instruction(entry_block_->normal_entry());
|
|
|
|
TAG();
|
|
|
|
|
|
|
|
// Store string.length.
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* string_push = PushLocal(string_param_);
|
2015-07-07 21:43:32 +00:00
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
StoreLocal(string_param_length_,
|
|
|
|
Bind(InstanceCall(InstanceCallDescriptor(String::ZoneHandle(
|
|
|
|
Field::GetterSymbol(Symbols::Length()))),
|
|
|
|
string_push)));
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
// Store (start_index - string.length) as the current position (since it's a
|
|
|
|
// negative offset from the end of the string).
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* start_index_push = PushLocal(start_index_param_);
|
|
|
|
Value* length_push = PushLocal(string_param_length_);
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
StoreLocal(current_position_, Bind(Sub(start_index_push, length_push)));
|
|
|
|
|
|
|
|
// Generate a local list variable to represent "registers" and
|
|
|
|
// initialize capture registers (others remain garbage).
|
2016-11-08 21:54:47 +00:00
|
|
|
StoreLocal(registers_, Bind(new (Z) ConstantInstr(registers_array_)));
|
2015-07-07 21:43:32 +00:00
|
|
|
ClearRegisters(0, saved_registers_count_ - 1);
|
|
|
|
|
|
|
|
// Generate a local list variable to represent the backtracking stack.
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* stack_cell_push = Bind(new (Z) ConstantInstr(stack_array_cell_));
|
2016-11-08 21:54:47 +00:00
|
|
|
StoreLocal(stack_,
|
|
|
|
Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
|
2020-01-09 01:37:27 +00:00
|
|
|
stack_cell_push, Bind(Uint64Constant(0)))));
|
2015-07-07 21:43:32 +00:00
|
|
|
StoreLocal(stack_pointer_, Bind(Int64Constant(-1)));
|
|
|
|
|
|
|
|
// Jump to the start block.
|
|
|
|
current_instruction_->Goto(start_block_);
|
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::GenerateBacktrackBlock() {
|
|
|
|
set_current_instruction(backtrack_block_);
|
|
|
|
TAG();
|
2017-06-23 10:51:52 +00:00
|
|
|
CheckPreemption(/*is_backtrack=*/true);
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
const intptr_t entries_count = entry_block_->indirect_entries().length();
|
|
|
|
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* block_id_push = Bind(PopStack());
|
2021-06-23 08:51:44 +00:00
|
|
|
backtrack_goto_ = new (Z) IndirectGotoInstr(entries_count, block_id_push);
|
2015-07-07 21:43:32 +00:00
|
|
|
CloseBlockWith(backtrack_goto_);
|
|
|
|
|
|
|
|
// Add an edge from the "indirect" goto to each of the targets.
|
|
|
|
for (intptr_t j = 0; j < entries_count; j++) {
|
|
|
|
backtrack_goto_->AddSuccessor(
|
|
|
|
TargetWithJoinGoto(entry_block_->indirect_entries().At(j)));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::GenerateSuccessBlock() {
|
|
|
|
set_current_instruction(success_block_);
|
|
|
|
TAG();
|
|
|
|
|
2017-11-14 18:13:04 +00:00
|
|
|
Value* type = Bind(new (Z) ConstantInstr(TypeArguments::ZoneHandle(
|
2021-01-06 15:22:11 +00:00
|
|
|
Z, IsolateGroup::Current()->object_store()->type_argument_int())));
|
2015-07-07 21:43:32 +00:00
|
|
|
Value* length = Bind(Uint64Constant(saved_registers_count_));
|
[vm/compiler] Copy inlined IDs with token positions.
When creating new instructions that inherit a token position that
represents a source location from another instruction, the inheriting
instruction must also have the same inlining ID in order for the source
position represented by the token position to be looked up in the
correct script.
Force this by wrapping both in a single InstructionSource struct which
is taken by instructions which take token positions instead of just a
token position. That way, it's more work to manually transfer the token
position separately from the inlining ID of the instruction than doing
the right thing of transfering both at once.
To ensure this information is kept consistent, we pass InstructionSource
structs through the FlowGraphCompiler all the way down to the
CodeSourceMapBuilder.
This CL also makes the following changes:
* Cache the upper bound of source positions in scripts and use it to add
a check for if a given real token position is valid for the script
without iterating over the line starts data for each token position.
* Start inlining intervals appropriately when adding descriptor and
null check information to the code source map.
Code size changes are minimal on Flutter gallery in release mode
(<0.05% decrease).
TEST=Existing tests on trybots, with manual checking with
--check-token-positions that previous errors are now removed.
Bug: https://github.com/dart-lang/sdk/issues/44436
Cq-Include-Trybots: luci.dart.try:vm-kernel-precomp-nnbd-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-kernel-nnbd-linux-debug-x64-try,vm-kernel-linux-debug-x64-try,vm-kernel-linux-release-x64-try,vm-kernel-nnbd-linux-release-x64-try,vm-kernel-precomp-linux-release-x64-try,vm-kernel-precomp-nnbd-linux-release-x64-try,vm-kernel-linux-product-x64-try,vm-kernel-precomp-linux-product-x64-try,vm-kernel-precomp-linux-debug-simarm_x64-try,vm-kernel-precomp-linux-release-simarm64-try,vm-kernel-linux-release-simarm64-try,vm-kernel-linux-release-simarm-try,vm-kernel-linux-release-ia32-try
Change-Id: I23ced262cb4e9fe9d81356f409e7e8d220d63ee0
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/173967
Reviewed-by: Régis Crelier <regis@google.com>
2020-12-18 00:42:14 +00:00
|
|
|
Value* array = Bind(new (Z) CreateArrayInstr(InstructionSource(), type,
|
2017-05-25 17:12:19 +00:00
|
|
|
length, GetNextDeoptId()));
|
2015-07-07 21:43:32 +00:00
|
|
|
StoreLocal(result_, array);
|
|
|
|
|
|
|
|
// Store captured offsets in the `matches` parameter.
|
|
|
|
for (intptr_t i = 0; i < saved_registers_count_; i++) {
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* matches_push = PushLocal(result_);
|
|
|
|
Value* index_push = Bind(Uint64Constant(i));
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
// Convert negative offsets from the end of the string to string indices.
|
|
|
|
// TODO(zerny): use positive offsets from the get-go.
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* offset_push = LoadRegister(i);
|
|
|
|
Value* len_push = PushLocal(string_param_length_);
|
|
|
|
Value* value_push = Bind(Add(offset_push, len_push));
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
Do(InstanceCall(InstanceCallDescriptor::FromToken(Token::kASSIGN_INDEX),
|
2016-11-08 21:54:47 +00:00
|
|
|
matches_push, index_push, value_push));
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Print the result if tracing.
|
|
|
|
PRINT(PushLocal(result_));
|
|
|
|
|
|
|
|
// Return true on success.
|
2017-05-25 17:12:19 +00:00
|
|
|
AppendInstruction(new (Z) ReturnInstr(
|
[vm/compiler] Copy inlined IDs with token positions.
When creating new instructions that inherit a token position that
represents a source location from another instruction, the inheriting
instruction must also have the same inlining ID in order for the source
position represented by the token position to be looked up in the
correct script.
Force this by wrapping both in a single InstructionSource struct which
is taken by instructions which take token positions instead of just a
token position. That way, it's more work to manually transfer the token
position separately from the inlining ID of the instruction than doing
the right thing of transfering both at once.
To ensure this information is kept consistent, we pass InstructionSource
structs through the FlowGraphCompiler all the way down to the
CodeSourceMapBuilder.
This CL also makes the following changes:
* Cache the upper bound of source positions in scripts and use it to add
a check for if a given real token position is valid for the script
without iterating over the line starts data for each token position.
* Start inlining intervals appropriately when adding descriptor and
null check information to the code source map.
Code size changes are minimal on Flutter gallery in release mode
(<0.05% decrease).
TEST=Existing tests on trybots, with manual checking with
--check-token-positions that previous errors are now removed.
Bug: https://github.com/dart-lang/sdk/issues/44436
Cq-Include-Trybots: luci.dart.try:vm-kernel-precomp-nnbd-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-kernel-nnbd-linux-debug-x64-try,vm-kernel-linux-debug-x64-try,vm-kernel-linux-release-x64-try,vm-kernel-nnbd-linux-release-x64-try,vm-kernel-precomp-linux-release-x64-try,vm-kernel-precomp-nnbd-linux-release-x64-try,vm-kernel-linux-product-x64-try,vm-kernel-precomp-linux-product-x64-try,vm-kernel-precomp-linux-debug-simarm_x64-try,vm-kernel-precomp-linux-release-simarm64-try,vm-kernel-linux-release-simarm64-try,vm-kernel-linux-release-simarm-try,vm-kernel-linux-release-ia32-try
Change-Id: I23ced262cb4e9fe9d81356f409e7e8d220d63ee0
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/173967
Reviewed-by: Régis Crelier <regis@google.com>
2020-12-18 00:42:14 +00:00
|
|
|
InstructionSource(), Bind(LoadLocal(result_)), GetNextDeoptId()));
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::GenerateExitBlock() {
|
|
|
|
set_current_instruction(exit_block_);
|
|
|
|
TAG();
|
|
|
|
|
|
|
|
// Return false on failure.
|
2017-05-25 17:12:19 +00:00
|
|
|
AppendInstruction(new (Z) ReturnInstr(
|
[vm/compiler] Copy inlined IDs with token positions.
When creating new instructions that inherit a token position that
represents a source location from another instruction, the inheriting
instruction must also have the same inlining ID in order for the source
position represented by the token position to be looked up in the
correct script.
Force this by wrapping both in a single InstructionSource struct which
is taken by instructions which take token positions instead of just a
token position. That way, it's more work to manually transfer the token
position separately from the inlining ID of the instruction than doing
the right thing of transfering both at once.
To ensure this information is kept consistent, we pass InstructionSource
structs through the FlowGraphCompiler all the way down to the
CodeSourceMapBuilder.
This CL also makes the following changes:
* Cache the upper bound of source positions in scripts and use it to add
a check for if a given real token position is valid for the script
without iterating over the line starts data for each token position.
* Start inlining intervals appropriately when adding descriptor and
null check information to the code source map.
Code size changes are minimal on Flutter gallery in release mode
(<0.05% decrease).
TEST=Existing tests on trybots, with manual checking with
--check-token-positions that previous errors are now removed.
Bug: https://github.com/dart-lang/sdk/issues/44436
Cq-Include-Trybots: luci.dart.try:vm-kernel-precomp-nnbd-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-kernel-nnbd-linux-debug-x64-try,vm-kernel-linux-debug-x64-try,vm-kernel-linux-release-x64-try,vm-kernel-nnbd-linux-release-x64-try,vm-kernel-precomp-linux-release-x64-try,vm-kernel-precomp-nnbd-linux-release-x64-try,vm-kernel-linux-product-x64-try,vm-kernel-precomp-linux-product-x64-try,vm-kernel-precomp-linux-debug-simarm_x64-try,vm-kernel-precomp-linux-release-simarm64-try,vm-kernel-linux-release-simarm64-try,vm-kernel-linux-release-simarm-try,vm-kernel-linux-release-ia32-try
Change-Id: I23ced262cb4e9fe9d81356f409e7e8d220d63ee0
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/173967
Reviewed-by: Régis Crelier <regis@google.com>
2020-12-18 00:42:14 +00:00
|
|
|
InstructionSource(), Bind(LoadLocal(result_)), GetNextDeoptId()));
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::FinalizeRegistersArray() {
|
|
|
|
ASSERT(registers_count_ >= saved_registers_count_);
|
|
|
|
registers_array_ =
|
|
|
|
TypedData::New(kTypedDataInt32ArrayCid, registers_count_, Heap::kOld);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool IRRegExpMacroAssembler::CanReadUnaligned() {
|
2020-06-26 09:39:15 +00:00
|
|
|
return !slow_safe();
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
2020-04-25 05:21:27 +00:00
|
|
|
ArrayPtr IRRegExpMacroAssembler::Execute(const RegExp& regexp,
|
|
|
|
const String& input,
|
|
|
|
const Smi& start_offset,
|
|
|
|
bool sticky,
|
|
|
|
Zone* zone) {
|
2015-09-04 08:06:02 +00:00
|
|
|
const intptr_t cid = input.GetClassId();
|
2016-11-17 16:46:21 +00:00
|
|
|
const Function& fun = Function::Handle(regexp.function(cid, sticky));
|
2015-09-04 08:06:02 +00:00
|
|
|
ASSERT(!fun.IsNull());
|
2015-07-07 21:43:32 +00:00
|
|
|
// Create the argument list.
|
2015-09-04 08:06:02 +00:00
|
|
|
const Array& args =
|
|
|
|
Array::Handle(Array::New(RegExpMacroAssembler::kParamCount));
|
|
|
|
args.SetAt(RegExpMacroAssembler::kParamRegExpIndex, regexp);
|
|
|
|
args.SetAt(RegExpMacroAssembler::kParamStringIndex, input);
|
|
|
|
args.SetAt(RegExpMacroAssembler::kParamStartOffsetIndex, start_offset);
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
// And finally call the generated code.
|
|
|
|
|
|
|
|
const Object& retval =
|
2015-09-04 08:06:02 +00:00
|
|
|
Object::Handle(zone, DartEntry::InvokeFunction(fun, args));
|
2022-02-17 21:29:11 +00:00
|
|
|
if (retval.IsLanguageError()) {
|
|
|
|
Exceptions::ThrowCompileTimeError(LanguageError::Cast(retval));
|
|
|
|
UNREACHABLE();
|
2019-01-07 22:47:33 +00:00
|
|
|
}
|
2015-07-07 21:43:32 +00:00
|
|
|
if (retval.IsError()) {
|
2022-02-17 21:29:11 +00:00
|
|
|
Exceptions::PropagateError(Error::Cast(retval));
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (retval.IsNull()) {
|
|
|
|
return Array::null();
|
|
|
|
}
|
|
|
|
|
|
|
|
ASSERT(retval.IsArray());
|
2021-01-15 23:32:02 +00:00
|
|
|
return Array::Cast(retval).ptr();
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
LocalVariable* IRRegExpMacroAssembler::Parameter(const String& name,
|
|
|
|
intptr_t index) const {
|
2016-11-08 21:54:47 +00:00
|
|
|
LocalVariable* local =
|
|
|
|
new (Z) LocalVariable(TokenPosition::kNoSource, TokenPosition::kNoSource,
|
|
|
|
name, Object::dynamic_type());
|
2015-07-07 21:43:32 +00:00
|
|
|
|
2018-06-07 11:35:58 +00:00
|
|
|
intptr_t param_frame_index = kParamCount - index;
|
2018-06-11 09:55:54 +00:00
|
|
|
local->set_index(VariableIndex(param_frame_index));
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
return local;
|
|
|
|
}
|
|
|
|
|
|
|
|
LocalVariable* IRRegExpMacroAssembler::Local(const String& name) {
|
2016-11-08 21:54:47 +00:00
|
|
|
LocalVariable* local =
|
|
|
|
new (Z) LocalVariable(TokenPosition::kNoSource, TokenPosition::kNoSource,
|
|
|
|
name, Object::dynamic_type());
|
2018-06-11 09:55:54 +00:00
|
|
|
local->set_index(VariableIndex(GetNextLocalIndex()));
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
return local;
|
|
|
|
}
|
|
|
|
|
|
|
|
ConstantInstr* IRRegExpMacroAssembler::Int64Constant(int64_t value) const {
|
2016-11-08 21:54:47 +00:00
|
|
|
return new (Z)
|
2019-09-20 08:35:44 +00:00
|
|
|
ConstantInstr(Integer::ZoneHandle(Z, Integer::NewCanonical(value)));
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
ConstantInstr* IRRegExpMacroAssembler::Uint64Constant(uint64_t value) const {
|
2017-07-17 19:20:13 +00:00
|
|
|
ASSERT(value < static_cast<uint64_t>(kMaxInt64));
|
|
|
|
return Int64Constant(static_cast<int64_t>(value));
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
ConstantInstr* IRRegExpMacroAssembler::BoolConstant(bool value) const {
|
2016-11-08 21:54:47 +00:00
|
|
|
return new (Z) ConstantInstr(value ? Bool::True() : Bool::False());
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
ConstantInstr* IRRegExpMacroAssembler::StringConstant(const char* value) const {
|
2016-11-08 21:54:47 +00:00
|
|
|
return new (Z)
|
|
|
|
ConstantInstr(String::ZoneHandle(Z, String::New(value, Heap::kOld)));
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
ConstantInstr* IRRegExpMacroAssembler::WordCharacterMapConstant() const {
|
|
|
|
const Library& lib = Library::Handle(Z, Library::CoreLibrary());
|
2016-11-08 21:54:47 +00:00
|
|
|
const Class& regexp_class =
|
|
|
|
Class::Handle(Z, lib.LookupClassAllowPrivate(Symbols::_RegExp()));
|
|
|
|
const Field& word_character_field = Field::ZoneHandle(
|
|
|
|
Z,
|
2016-04-07 23:36:35 +00:00
|
|
|
regexp_class.LookupStaticFieldAllowPrivate(Symbols::_wordCharacterMap()));
|
2015-07-07 21:43:32 +00:00
|
|
|
ASSERT(!word_character_field.IsNull());
|
|
|
|
|
2018-12-05 18:57:02 +00:00
|
|
|
DEBUG_ASSERT(Thread::Current()->TopErrorHandlerIsSetJump());
|
2015-07-07 21:43:32 +00:00
|
|
|
|
2021-01-07 11:44:58 +00:00
|
|
|
const auto& value =
|
|
|
|
Object::Handle(Z, word_character_field.StaticConstFieldValue());
|
|
|
|
if (value.IsError()) {
|
|
|
|
Report::LongJump(Error::Cast(value));
|
|
|
|
}
|
|
|
|
return new (Z)
|
2021-01-15 23:32:02 +00:00
|
|
|
ConstantInstr(Instance::ZoneHandle(Z, Instance::RawCast(value.ptr())));
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
ComparisonInstr* IRRegExpMacroAssembler::Comparison(ComparisonKind kind,
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* lhs,
|
|
|
|
Value* rhs) {
|
2015-07-07 21:43:32 +00:00
|
|
|
Token::Kind strict_comparison = Token::kEQ_STRICT;
|
|
|
|
Token::Kind intermediate_operator = Token::kILLEGAL;
|
|
|
|
switch (kind) {
|
2016-11-08 21:54:47 +00:00
|
|
|
case kEQ:
|
|
|
|
intermediate_operator = Token::kEQ;
|
|
|
|
break;
|
|
|
|
case kNE:
|
|
|
|
intermediate_operator = Token::kEQ;
|
|
|
|
strict_comparison = Token::kNE_STRICT;
|
|
|
|
break;
|
|
|
|
case kLT:
|
|
|
|
intermediate_operator = Token::kLT;
|
|
|
|
break;
|
|
|
|
case kGT:
|
|
|
|
intermediate_operator = Token::kGT;
|
|
|
|
break;
|
|
|
|
case kLTE:
|
|
|
|
intermediate_operator = Token::kLTE;
|
|
|
|
break;
|
|
|
|
case kGTE:
|
|
|
|
intermediate_operator = Token::kGTE;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
UNREACHABLE();
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
ASSERT(intermediate_operator != Token::kILLEGAL);
|
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
Value* lhs_value = Bind(InstanceCall(
|
|
|
|
InstanceCallDescriptor::FromToken(intermediate_operator), lhs, rhs));
|
2015-07-07 21:43:32 +00:00
|
|
|
Value* rhs_value = Bind(BoolConstant(true));
|
|
|
|
|
2017-05-25 17:12:19 +00:00
|
|
|
return new (Z)
|
[vm/compiler] Copy inlined IDs with token positions.
When creating new instructions that inherit a token position that
represents a source location from another instruction, the inheriting
instruction must also have the same inlining ID in order for the source
position represented by the token position to be looked up in the
correct script.
Force this by wrapping both in a single InstructionSource struct which
is taken by instructions which take token positions instead of just a
token position. That way, it's more work to manually transfer the token
position separately from the inlining ID of the instruction than doing
the right thing of transfering both at once.
To ensure this information is kept consistent, we pass InstructionSource
structs through the FlowGraphCompiler all the way down to the
CodeSourceMapBuilder.
This CL also makes the following changes:
* Cache the upper bound of source positions in scripts and use it to add
a check for if a given real token position is valid for the script
without iterating over the line starts data for each token position.
* Start inlining intervals appropriately when adding descriptor and
null check information to the code source map.
Code size changes are minimal on Flutter gallery in release mode
(<0.05% decrease).
TEST=Existing tests on trybots, with manual checking with
--check-token-positions that previous errors are now removed.
Bug: https://github.com/dart-lang/sdk/issues/44436
Cq-Include-Trybots: luci.dart.try:vm-kernel-precomp-nnbd-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-kernel-nnbd-linux-debug-x64-try,vm-kernel-linux-debug-x64-try,vm-kernel-linux-release-x64-try,vm-kernel-nnbd-linux-release-x64-try,vm-kernel-precomp-linux-release-x64-try,vm-kernel-precomp-nnbd-linux-release-x64-try,vm-kernel-linux-product-x64-try,vm-kernel-precomp-linux-product-x64-try,vm-kernel-precomp-linux-debug-simarm_x64-try,vm-kernel-precomp-linux-release-simarm64-try,vm-kernel-linux-release-simarm64-try,vm-kernel-linux-release-simarm-try,vm-kernel-linux-release-ia32-try
Change-Id: I23ced262cb4e9fe9d81356f409e7e8d220d63ee0
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/173967
Reviewed-by: Régis Crelier <regis@google.com>
2020-12-18 00:42:14 +00:00
|
|
|
StrictCompareInstr(InstructionSource(), strict_comparison, lhs_value,
|
2017-05-25 17:12:19 +00:00
|
|
|
rhs_value, true, GetNextDeoptId());
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
ComparisonInstr* IRRegExpMacroAssembler::Comparison(ComparisonKind kind,
|
|
|
|
Definition* lhs,
|
|
|
|
Definition* rhs) {
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* lhs_push = Bind(lhs);
|
|
|
|
Value* rhs_push = Bind(rhs);
|
2015-07-07 21:43:32 +00:00
|
|
|
return Comparison(kind, lhs_push, rhs_push);
|
|
|
|
}
|
|
|
|
|
|
|
|
StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
|
2017-09-28 19:43:32 +00:00
|
|
|
const Function& function,
|
|
|
|
ICData::RebindRule rebind_rule) const {
|
2020-01-09 01:37:27 +00:00
|
|
|
InputsArray* arguments = new (Z) InputsArray(Z, 0);
|
2017-09-28 19:43:32 +00:00
|
|
|
return StaticCall(function, arguments, rebind_rule);
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
|
|
|
|
const Function& function,
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* arg1,
|
2017-09-28 19:43:32 +00:00
|
|
|
ICData::RebindRule rebind_rule) const {
|
2020-01-09 01:37:27 +00:00
|
|
|
InputsArray* arguments = new (Z) InputsArray(Z, 1);
|
2015-07-07 21:43:32 +00:00
|
|
|
arguments->Add(arg1);
|
|
|
|
|
2017-09-28 19:43:32 +00:00
|
|
|
return StaticCall(function, arguments, rebind_rule);
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
|
|
|
|
const Function& function,
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* arg1,
|
|
|
|
Value* arg2,
|
2017-09-28 19:43:32 +00:00
|
|
|
ICData::RebindRule rebind_rule) const {
|
2020-01-09 01:37:27 +00:00
|
|
|
InputsArray* arguments = new (Z) InputsArray(Z, 2);
|
2015-07-07 21:43:32 +00:00
|
|
|
arguments->Add(arg1);
|
|
|
|
arguments->Add(arg2);
|
|
|
|
|
2017-09-28 19:43:32 +00:00
|
|
|
return StaticCall(function, arguments, rebind_rule);
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
|
|
|
|
const Function& function,
|
2020-01-09 01:37:27 +00:00
|
|
|
InputsArray* arguments,
|
2017-09-28 19:43:32 +00:00
|
|
|
ICData::RebindRule rebind_rule) const {
|
2017-05-18 21:03:43 +00:00
|
|
|
const intptr_t kTypeArgsLen = 0;
|
[vm/compiler] Copy inlined IDs with token positions.
When creating new instructions that inherit a token position that
represents a source location from another instruction, the inheriting
instruction must also have the same inlining ID in order for the source
position represented by the token position to be looked up in the
correct script.
Force this by wrapping both in a single InstructionSource struct which
is taken by instructions which take token positions instead of just a
token position. That way, it's more work to manually transfer the token
position separately from the inlining ID of the instruction than doing
the right thing of transfering both at once.
To ensure this information is kept consistent, we pass InstructionSource
structs through the FlowGraphCompiler all the way down to the
CodeSourceMapBuilder.
This CL also makes the following changes:
* Cache the upper bound of source positions in scripts and use it to add
a check for if a given real token position is valid for the script
without iterating over the line starts data for each token position.
* Start inlining intervals appropriately when adding descriptor and
null check information to the code source map.
Code size changes are minimal on Flutter gallery in release mode
(<0.05% decrease).
TEST=Existing tests on trybots, with manual checking with
--check-token-positions that previous errors are now removed.
Bug: https://github.com/dart-lang/sdk/issues/44436
Cq-Include-Trybots: luci.dart.try:vm-kernel-precomp-nnbd-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-kernel-nnbd-linux-debug-x64-try,vm-kernel-linux-debug-x64-try,vm-kernel-linux-release-x64-try,vm-kernel-nnbd-linux-release-x64-try,vm-kernel-precomp-linux-release-x64-try,vm-kernel-precomp-nnbd-linux-release-x64-try,vm-kernel-linux-product-x64-try,vm-kernel-precomp-linux-product-x64-try,vm-kernel-precomp-linux-debug-simarm_x64-try,vm-kernel-precomp-linux-release-simarm64-try,vm-kernel-linux-release-simarm64-try,vm-kernel-linux-release-simarm-try,vm-kernel-linux-release-ia32-try
Change-Id: I23ced262cb4e9fe9d81356f409e7e8d220d63ee0
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/173967
Reviewed-by: Régis Crelier <regis@google.com>
2020-12-18 00:42:14 +00:00
|
|
|
return new (Z) StaticCallInstr(InstructionSource(), function, kTypeArgsLen,
|
|
|
|
Object::null_array(), arguments,
|
2017-09-28 19:43:32 +00:00
|
|
|
ic_data_array_, GetNextDeoptId(), rebind_rule);
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
|
|
|
|
const InstanceCallDescriptor& desc,
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* arg1) const {
|
|
|
|
InputsArray* arguments = new (Z) InputsArray(Z, 1);
|
2015-07-07 21:43:32 +00:00
|
|
|
arguments->Add(arg1);
|
|
|
|
|
|
|
|
return InstanceCall(desc, arguments);
|
|
|
|
}
|
|
|
|
|
|
|
|
InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
|
|
|
|
const InstanceCallDescriptor& desc,
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* arg1,
|
|
|
|
Value* arg2) const {
|
|
|
|
InputsArray* arguments = new (Z) InputsArray(Z, 2);
|
2015-07-07 21:43:32 +00:00
|
|
|
arguments->Add(arg1);
|
|
|
|
arguments->Add(arg2);
|
|
|
|
|
|
|
|
return InstanceCall(desc, arguments);
|
|
|
|
}
|
|
|
|
|
|
|
|
InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
|
|
|
|
const InstanceCallDescriptor& desc,
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* arg1,
|
|
|
|
Value* arg2,
|
|
|
|
Value* arg3) const {
|
|
|
|
InputsArray* arguments = new (Z) InputsArray(Z, 3);
|
2015-07-07 21:43:32 +00:00
|
|
|
arguments->Add(arg1);
|
|
|
|
arguments->Add(arg2);
|
|
|
|
arguments->Add(arg3);
|
|
|
|
|
|
|
|
return InstanceCall(desc, arguments);
|
|
|
|
}
|
|
|
|
|
|
|
|
InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
|
|
|
|
const InstanceCallDescriptor& desc,
|
2020-01-09 01:37:27 +00:00
|
|
|
InputsArray* arguments) const {
|
2017-05-18 21:03:43 +00:00
|
|
|
const intptr_t kTypeArgsLen = 0;
|
2017-05-25 17:12:19 +00:00
|
|
|
return new (Z) InstanceCallInstr(
|
[vm/compiler] Copy inlined IDs with token positions.
When creating new instructions that inherit a token position that
represents a source location from another instruction, the inheriting
instruction must also have the same inlining ID in order for the source
position represented by the token position to be looked up in the
correct script.
Force this by wrapping both in a single InstructionSource struct which
is taken by instructions which take token positions instead of just a
token position. That way, it's more work to manually transfer the token
position separately from the inlining ID of the instruction than doing
the right thing of transfering both at once.
To ensure this information is kept consistent, we pass InstructionSource
structs through the FlowGraphCompiler all the way down to the
CodeSourceMapBuilder.
This CL also makes the following changes:
* Cache the upper bound of source positions in scripts and use it to add
a check for if a given real token position is valid for the script
without iterating over the line starts data for each token position.
* Start inlining intervals appropriately when adding descriptor and
null check information to the code source map.
Code size changes are minimal on Flutter gallery in release mode
(<0.05% decrease).
TEST=Existing tests on trybots, with manual checking with
--check-token-positions that previous errors are now removed.
Bug: https://github.com/dart-lang/sdk/issues/44436
Cq-Include-Trybots: luci.dart.try:vm-kernel-precomp-nnbd-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-kernel-nnbd-linux-debug-x64-try,vm-kernel-linux-debug-x64-try,vm-kernel-linux-release-x64-try,vm-kernel-nnbd-linux-release-x64-try,vm-kernel-precomp-linux-release-x64-try,vm-kernel-precomp-nnbd-linux-release-x64-try,vm-kernel-linux-product-x64-try,vm-kernel-precomp-linux-product-x64-try,vm-kernel-precomp-linux-debug-simarm_x64-try,vm-kernel-precomp-linux-release-simarm64-try,vm-kernel-linux-release-simarm64-try,vm-kernel-linux-release-simarm-try,vm-kernel-linux-release-ia32-try
Change-Id: I23ced262cb4e9fe9d81356f409e7e8d220d63ee0
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/173967
Reviewed-by: Régis Crelier <regis@google.com>
2020-12-18 00:42:14 +00:00
|
|
|
InstructionSource(), desc.name, desc.token_kind, arguments, kTypeArgsLen,
|
|
|
|
Object::null_array(), desc.checked_argument_count, ic_data_array_,
|
|
|
|
GetNextDeoptId());
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
LoadLocalInstr* IRRegExpMacroAssembler::LoadLocal(LocalVariable* local) const {
|
[vm/compiler] Copy inlined IDs with token positions.
When creating new instructions that inherit a token position that
represents a source location from another instruction, the inheriting
instruction must also have the same inlining ID in order for the source
position represented by the token position to be looked up in the
correct script.
Force this by wrapping both in a single InstructionSource struct which
is taken by instructions which take token positions instead of just a
token position. That way, it's more work to manually transfer the token
position separately from the inlining ID of the instruction than doing
the right thing of transfering both at once.
To ensure this information is kept consistent, we pass InstructionSource
structs through the FlowGraphCompiler all the way down to the
CodeSourceMapBuilder.
This CL also makes the following changes:
* Cache the upper bound of source positions in scripts and use it to add
a check for if a given real token position is valid for the script
without iterating over the line starts data for each token position.
* Start inlining intervals appropriately when adding descriptor and
null check information to the code source map.
Code size changes are minimal on Flutter gallery in release mode
(<0.05% decrease).
TEST=Existing tests on trybots, with manual checking with
--check-token-positions that previous errors are now removed.
Bug: https://github.com/dart-lang/sdk/issues/44436
Cq-Include-Trybots: luci.dart.try:vm-kernel-precomp-nnbd-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-kernel-nnbd-linux-debug-x64-try,vm-kernel-linux-debug-x64-try,vm-kernel-linux-release-x64-try,vm-kernel-nnbd-linux-release-x64-try,vm-kernel-precomp-linux-release-x64-try,vm-kernel-precomp-nnbd-linux-release-x64-try,vm-kernel-linux-product-x64-try,vm-kernel-precomp-linux-product-x64-try,vm-kernel-precomp-linux-debug-simarm_x64-try,vm-kernel-precomp-linux-release-simarm64-try,vm-kernel-linux-release-simarm64-try,vm-kernel-linux-release-simarm-try,vm-kernel-linux-release-ia32-try
Change-Id: I23ced262cb4e9fe9d81356f409e7e8d220d63ee0
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/173967
Reviewed-by: Régis Crelier <regis@google.com>
2020-12-18 00:42:14 +00:00
|
|
|
return new (Z) LoadLocalInstr(*local, InstructionSource());
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
void IRRegExpMacroAssembler::StoreLocal(LocalVariable* local, Value* value) {
|
[vm/compiler] Copy inlined IDs with token positions.
When creating new instructions that inherit a token position that
represents a source location from another instruction, the inheriting
instruction must also have the same inlining ID in order for the source
position represented by the token position to be looked up in the
correct script.
Force this by wrapping both in a single InstructionSource struct which
is taken by instructions which take token positions instead of just a
token position. That way, it's more work to manually transfer the token
position separately from the inlining ID of the instruction than doing
the right thing of transfering both at once.
To ensure this information is kept consistent, we pass InstructionSource
structs through the FlowGraphCompiler all the way down to the
CodeSourceMapBuilder.
This CL also makes the following changes:
* Cache the upper bound of source positions in scripts and use it to add
a check for if a given real token position is valid for the script
without iterating over the line starts data for each token position.
* Start inlining intervals appropriately when adding descriptor and
null check information to the code source map.
Code size changes are minimal on Flutter gallery in release mode
(<0.05% decrease).
TEST=Existing tests on trybots, with manual checking with
--check-token-positions that previous errors are now removed.
Bug: https://github.com/dart-lang/sdk/issues/44436
Cq-Include-Trybots: luci.dart.try:vm-kernel-precomp-nnbd-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-kernel-nnbd-linux-debug-x64-try,vm-kernel-linux-debug-x64-try,vm-kernel-linux-release-x64-try,vm-kernel-nnbd-linux-release-x64-try,vm-kernel-precomp-linux-release-x64-try,vm-kernel-precomp-nnbd-linux-release-x64-try,vm-kernel-linux-product-x64-try,vm-kernel-precomp-linux-product-x64-try,vm-kernel-precomp-linux-debug-simarm_x64-try,vm-kernel-precomp-linux-release-simarm64-try,vm-kernel-linux-release-simarm64-try,vm-kernel-linux-release-simarm-try,vm-kernel-linux-release-ia32-try
Change-Id: I23ced262cb4e9fe9d81356f409e7e8d220d63ee0
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/173967
Reviewed-by: Régis Crelier <regis@google.com>
2020-12-18 00:42:14 +00:00
|
|
|
Do(new (Z) StoreLocalInstr(*local, value, InstructionSource()));
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::set_current_instruction(Instruction* instruction) {
|
|
|
|
current_instruction_ = instruction;
|
|
|
|
}
|
|
|
|
|
|
|
|
Value* IRRegExpMacroAssembler::Bind(Definition* definition) {
|
|
|
|
AppendInstruction(definition);
|
|
|
|
definition->set_temp_index(temp_id_.Alloc());
|
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
return new (Z) Value(definition);
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::Do(Definition* definition) {
|
|
|
|
AppendInstruction(definition);
|
|
|
|
}
|
|
|
|
|
|
|
|
Value* IRRegExpMacroAssembler::BindLoadLocal(const LocalVariable& local) {
|
|
|
|
if (local.IsConst()) {
|
2016-11-08 21:54:47 +00:00
|
|
|
return Bind(new (Z) ConstantInstr(*local.ConstValue()));
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
ASSERT(!local.is_captured());
|
[vm/compiler] Copy inlined IDs with token positions.
When creating new instructions that inherit a token position that
represents a source location from another instruction, the inheriting
instruction must also have the same inlining ID in order for the source
position represented by the token position to be looked up in the
correct script.
Force this by wrapping both in a single InstructionSource struct which
is taken by instructions which take token positions instead of just a
token position. That way, it's more work to manually transfer the token
position separately from the inlining ID of the instruction than doing
the right thing of transfering both at once.
To ensure this information is kept consistent, we pass InstructionSource
structs through the FlowGraphCompiler all the way down to the
CodeSourceMapBuilder.
This CL also makes the following changes:
* Cache the upper bound of source positions in scripts and use it to add
a check for if a given real token position is valid for the script
without iterating over the line starts data for each token position.
* Start inlining intervals appropriately when adding descriptor and
null check information to the code source map.
Code size changes are minimal on Flutter gallery in release mode
(<0.05% decrease).
TEST=Existing tests on trybots, with manual checking with
--check-token-positions that previous errors are now removed.
Bug: https://github.com/dart-lang/sdk/issues/44436
Cq-Include-Trybots: luci.dart.try:vm-kernel-precomp-nnbd-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-kernel-nnbd-linux-debug-x64-try,vm-kernel-linux-debug-x64-try,vm-kernel-linux-release-x64-try,vm-kernel-nnbd-linux-release-x64-try,vm-kernel-precomp-linux-release-x64-try,vm-kernel-precomp-nnbd-linux-release-x64-try,vm-kernel-linux-product-x64-try,vm-kernel-precomp-linux-product-x64-try,vm-kernel-precomp-linux-debug-simarm_x64-try,vm-kernel-precomp-linux-release-simarm64-try,vm-kernel-linux-release-simarm64-try,vm-kernel-linux-release-simarm-try,vm-kernel-linux-release-ia32-try
Change-Id: I23ced262cb4e9fe9d81356f409e7e8d220d63ee0
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/173967
Reviewed-by: Régis Crelier <regis@google.com>
2020-12-18 00:42:14 +00:00
|
|
|
return Bind(new (Z) LoadLocalInstr(local, InstructionSource()));
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// In some cases, the V8 irregexp engine generates unreachable code by emitting
|
|
|
|
// a jmp not followed by a bind. We cannot do the same, since it is impossible
|
|
|
|
// to append to a block following a jmp. In such cases, assume that we are doing
|
|
|
|
// the correct thing, but output a warning when tracing.
|
2016-11-08 21:54:47 +00:00
|
|
|
#define HANDLE_DEAD_CODE_EMISSION() \
|
|
|
|
if (current_instruction_ == NULL) { \
|
|
|
|
if (FLAG_trace_irregexp) { \
|
2018-06-13 19:51:40 +00:00
|
|
|
OS::PrintErr( \
|
2016-11-08 21:54:47 +00:00
|
|
|
"WARNING: Attempting to append to a closed assembler. " \
|
|
|
|
"This could be either a bug or generation of dead code " \
|
|
|
|
"inherited from V8.\n"); \
|
|
|
|
} \
|
|
|
|
BlockLabel dummy; \
|
|
|
|
BindBlock(&dummy); \
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::AppendInstruction(Instruction* instruction) {
|
|
|
|
HANDLE_DEAD_CODE_EMISSION();
|
|
|
|
|
|
|
|
ASSERT(current_instruction_ != NULL);
|
|
|
|
ASSERT(current_instruction_->next() == NULL);
|
|
|
|
|
|
|
|
temp_id_.Dealloc(instruction->InputCount());
|
|
|
|
|
|
|
|
current_instruction_->LinkTo(instruction);
|
|
|
|
set_current_instruction(instruction);
|
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::CloseBlockWith(Instruction* instruction) {
|
|
|
|
HANDLE_DEAD_CODE_EMISSION();
|
|
|
|
|
|
|
|
ASSERT(current_instruction_ != NULL);
|
|
|
|
ASSERT(current_instruction_->next() == NULL);
|
|
|
|
|
|
|
|
temp_id_.Dealloc(instruction->InputCount());
|
|
|
|
|
|
|
|
current_instruction_->LinkTo(instruction);
|
|
|
|
set_current_instruction(NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::GoTo(BlockLabel* to) {
|
|
|
|
if (to == NULL) {
|
|
|
|
Backtrack();
|
|
|
|
} else {
|
|
|
|
to->SetLinked();
|
|
|
|
GoTo(to->block());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Closes the current block with a goto, and unsets current_instruction_.
|
|
|
|
// BindBlock() must be called before emission can continue.
|
|
|
|
void IRRegExpMacroAssembler::GoTo(JoinEntryInstr* to) {
|
|
|
|
HANDLE_DEAD_CODE_EMISSION();
|
|
|
|
|
|
|
|
ASSERT(current_instruction_ != NULL);
|
|
|
|
ASSERT(current_instruction_->next() == NULL);
|
|
|
|
current_instruction_->Goto(to);
|
|
|
|
set_current_instruction(NULL);
|
|
|
|
}
|
|
|
|
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* IRRegExpMacroAssembler::PushLocal(LocalVariable* local) {
|
|
|
|
return Bind(LoadLocal(local));
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::Print(const char* str) {
|
2020-01-09 01:37:27 +00:00
|
|
|
Print(Bind(new (Z) ConstantInstr(
|
|
|
|
String::ZoneHandle(Z, String::New(str, Heap::kOld)))));
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
2020-01-09 01:37:27 +00:00
|
|
|
void IRRegExpMacroAssembler::Print(Value* argument) {
|
2015-07-07 21:43:32 +00:00
|
|
|
const Library& lib = Library::Handle(Library::CoreLibrary());
|
2016-11-08 21:54:47 +00:00
|
|
|
const Function& print_fn =
|
|
|
|
Function::ZoneHandle(Z, lib.LookupFunctionAllowPrivate(Symbols::print()));
|
2017-09-28 19:43:32 +00:00
|
|
|
Do(StaticCall(print_fn, argument, ICData::kStatic));
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::PrintBlocks() {
|
|
|
|
for (intptr_t i = 0; i < blocks_.length(); i++) {
|
|
|
|
FlowGraphPrinter::PrintBlock(blocks_[i], false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
intptr_t IRRegExpMacroAssembler::stack_limit_slack() {
|
2015-07-07 21:43:32 +00:00
|
|
|
return 32;
|
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::AdvanceCurrentPosition(intptr_t by) {
|
|
|
|
TAG();
|
|
|
|
if (by != 0) {
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* cur_pos_push = PushLocal(current_position_);
|
|
|
|
Value* by_push = Bind(Int64Constant(by));
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
Value* new_pos_value = Bind(Add(cur_pos_push, by_push));
|
|
|
|
StoreLocal(current_position_, new_pos_value);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::AdvanceRegister(intptr_t reg, intptr_t by) {
|
|
|
|
TAG();
|
|
|
|
ASSERT(reg >= 0);
|
|
|
|
ASSERT(reg < registers_count_);
|
|
|
|
|
|
|
|
if (by != 0) {
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* registers_push = PushLocal(registers_);
|
|
|
|
Value* index_push = PushRegisterIndex(reg);
|
|
|
|
Value* reg_push = LoadRegister(reg);
|
|
|
|
Value* by_push = Bind(Int64Constant(by));
|
|
|
|
Value* value_push = Bind(Add(reg_push, by_push));
|
2015-07-07 21:43:32 +00:00
|
|
|
StoreRegister(registers_push, index_push, value_push);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::Backtrack() {
|
|
|
|
TAG();
|
|
|
|
GoTo(backtrack_block_);
|
|
|
|
}
|
|
|
|
|
|
|
|
// A BindBlock is analogous to assigning a label to a basic block.
|
|
|
|
// If the BlockLabel does not yet contain a block, it is created.
|
|
|
|
// If there is a current instruction, append a goto to the bound block.
|
|
|
|
void IRRegExpMacroAssembler::BindBlock(BlockLabel* label) {
|
2020-04-16 22:59:03 +00:00
|
|
|
ASSERT(!label->is_bound());
|
2015-07-07 21:43:32 +00:00
|
|
|
ASSERT(label->block()->next() == NULL);
|
|
|
|
|
2020-04-16 22:59:03 +00:00
|
|
|
label->BindTo(block_id_.Alloc());
|
2015-07-07 21:43:32 +00:00
|
|
|
blocks_.Add(label->block());
|
|
|
|
|
|
|
|
if (current_instruction_ != NULL) {
|
|
|
|
GoTo(label);
|
|
|
|
}
|
|
|
|
set_current_instruction(label->block());
|
|
|
|
|
|
|
|
// Print the id of the current block if tracing.
|
2020-01-09 01:37:27 +00:00
|
|
|
PRINT(Bind(Uint64Constant(label->block()->block_id())));
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
intptr_t IRRegExpMacroAssembler::GetNextLocalIndex() {
|
|
|
|
intptr_t id = local_id_.Alloc();
|
2018-06-07 11:35:58 +00:00
|
|
|
return -id;
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
Value* IRRegExpMacroAssembler::LoadRegister(intptr_t index) {
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* registers_push = PushLocal(registers_);
|
|
|
|
Value* index_push = PushRegisterIndex(index);
|
2015-07-07 21:43:32 +00:00
|
|
|
return Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
|
2016-11-08 21:54:47 +00:00
|
|
|
registers_push, index_push));
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::StoreRegister(intptr_t index, intptr_t value) {
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* registers_push = PushLocal(registers_);
|
|
|
|
Value* index_push = PushRegisterIndex(index);
|
|
|
|
Value* value_push = Bind(Uint64Constant(value));
|
2015-07-07 21:43:32 +00:00
|
|
|
StoreRegister(registers_push, index_push, value_push);
|
|
|
|
}
|
|
|
|
|
2020-01-09 01:37:27 +00:00
|
|
|
void IRRegExpMacroAssembler::StoreRegister(Value* registers,
|
|
|
|
Value* index,
|
|
|
|
Value* value) {
|
2015-07-07 21:43:32 +00:00
|
|
|
TAG();
|
|
|
|
Do(InstanceCall(InstanceCallDescriptor::FromToken(Token::kASSIGN_INDEX),
|
2016-11-08 21:54:47 +00:00
|
|
|
registers, index, value));
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* IRRegExpMacroAssembler::PushRegisterIndex(intptr_t index) {
|
2015-07-07 21:43:32 +00:00
|
|
|
if (registers_count_ <= index) {
|
|
|
|
registers_count_ = index + 1;
|
|
|
|
}
|
2020-01-09 01:37:27 +00:00
|
|
|
return Bind(Uint64Constant(index));
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::CheckCharacter(uint32_t c, BlockLabel* on_equal) {
|
|
|
|
TAG();
|
|
|
|
Definition* cur_char_def = LoadLocal(current_character_);
|
|
|
|
Definition* char_def = Uint64Constant(c);
|
|
|
|
|
|
|
|
BranchOrBacktrack(Comparison(kEQ, cur_char_def, char_def), on_equal);
|
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::CheckCharacterGT(uint16_t limit,
|
|
|
|
BlockLabel* on_greater) {
|
|
|
|
TAG();
|
2016-11-08 21:54:47 +00:00
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kGT, LoadLocal(current_character_), Uint64Constant(limit)),
|
|
|
|
on_greater);
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::CheckAtStart(BlockLabel* on_at_start) {
|
|
|
|
TAG();
|
|
|
|
|
[VM] Adding regexp lookbehind assertion support.
See https://github.com/tc39/proposal-regexp-lookbehind
for a high-level description of the feature and examples. This is one of the
features requested in https://github.com/dart-lang/sdk/issues/34935.
This work takes the feature as present in the v8 engine and appropriately
merges it into our irregexp fork. Notable changes to the irregexp codebase to
introduce this feature:
-----
We can no longer assume that all matching proceeds forwards, since lookbehind
matching proceeds backwards. Similarly, we cannot assume that we can only be
at the start of a string if we started matching from that point. The direction
of matching must also be taken into consideration when doing bounds checking,
which previously assumed the engine would never attempt to look before the
start of a string.
-----
We may now parse backreferences to captures before the capture they
reference, since we parse regular expressions left to right, but lookbehinds
perform captures as they evaluate the string from right to left. Since
RegExpBackReference objects contain a pointer to their corresponding capture,
this means that we may need to create RegExpCapture objects prior to the
parsing of the corresponding captured subexpression.
Thus, RegExpCapture objects are now only initialized with their index, and the
body is set later when the subexpression is encountered and parsed. This means
any method that operates on the body of a RegExpCapture can no longer be const,
which also affects the rest of the RegExpTree class hierarchy. This also means
that we don't have a valid max_match length for backreferences based off the
capture body, and must assume they can end up being any length.
-----
Change-Id: Iffe0e71b17b1a0c6fea77235e8aee5c093005811
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/94540
Commit-Queue: Stevie Strickland <sstrickl@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
2019-03-14 14:26:47 +00:00
|
|
|
// Are we at the start of the input, i.e. is (offset == string_length * -1)?
|
2015-07-07 21:43:32 +00:00
|
|
|
Definition* neg_len_def =
|
|
|
|
InstanceCall(InstanceCallDescriptor::FromToken(Token::kNEGATE),
|
|
|
|
PushLocal(string_param_length_));
|
|
|
|
Definition* offset_def = LoadLocal(current_position_);
|
2016-11-08 21:54:47 +00:00
|
|
|
BranchOrBacktrack(Comparison(kEQ, neg_len_def, offset_def), on_at_start);
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
[VM] Adding regexp lookbehind assertion support.
See https://github.com/tc39/proposal-regexp-lookbehind
for a high-level description of the feature and examples. This is one of the
features requested in https://github.com/dart-lang/sdk/issues/34935.
This work takes the feature as present in the v8 engine and appropriately
merges it into our irregexp fork. Notable changes to the irregexp codebase to
introduce this feature:
-----
We can no longer assume that all matching proceeds forwards, since lookbehind
matching proceeds backwards. Similarly, we cannot assume that we can only be
at the start of a string if we started matching from that point. The direction
of matching must also be taken into consideration when doing bounds checking,
which previously assumed the engine would never attempt to look before the
start of a string.
-----
We may now parse backreferences to captures before the capture they
reference, since we parse regular expressions left to right, but lookbehinds
perform captures as they evaluate the string from right to left. Since
RegExpBackReference objects contain a pointer to their corresponding capture,
this means that we may need to create RegExpCapture objects prior to the
parsing of the corresponding captured subexpression.
Thus, RegExpCapture objects are now only initialized with their index, and the
body is set later when the subexpression is encountered and parsed. This means
any method that operates on the body of a RegExpCapture can no longer be const,
which also affects the rest of the RegExpTree class hierarchy. This also means
that we don't have a valid max_match length for backreferences based off the
capture body, and must assume they can end up being any length.
-----
Change-Id: Iffe0e71b17b1a0c6fea77235e8aee5c093005811
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/94540
Commit-Queue: Stevie Strickland <sstrickl@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
2019-03-14 14:26:47 +00:00
|
|
|
// cp_offset => offset from the current (character) pointer
|
|
|
|
// This offset may be negative due to traversing backwards during lookbehind.
|
|
|
|
void IRRegExpMacroAssembler::CheckNotAtStart(intptr_t cp_offset,
|
|
|
|
BlockLabel* on_not_at_start) {
|
2015-07-07 21:43:32 +00:00
|
|
|
TAG();
|
|
|
|
|
[VM] Adding regexp lookbehind assertion support.
See https://github.com/tc39/proposal-regexp-lookbehind
for a high-level description of the feature and examples. This is one of the
features requested in https://github.com/dart-lang/sdk/issues/34935.
This work takes the feature as present in the v8 engine and appropriately
merges it into our irregexp fork. Notable changes to the irregexp codebase to
introduce this feature:
-----
We can no longer assume that all matching proceeds forwards, since lookbehind
matching proceeds backwards. Similarly, we cannot assume that we can only be
at the start of a string if we started matching from that point. The direction
of matching must also be taken into consideration when doing bounds checking,
which previously assumed the engine would never attempt to look before the
start of a string.
-----
We may now parse backreferences to captures before the capture they
reference, since we parse regular expressions left to right, but lookbehinds
perform captures as they evaluate the string from right to left. Since
RegExpBackReference objects contain a pointer to their corresponding capture,
this means that we may need to create RegExpCapture objects prior to the
parsing of the corresponding captured subexpression.
Thus, RegExpCapture objects are now only initialized with their index, and the
body is set later when the subexpression is encountered and parsed. This means
any method that operates on the body of a RegExpCapture can no longer be const,
which also affects the rest of the RegExpTree class hierarchy. This also means
that we don't have a valid max_match length for backreferences based off the
capture body, and must assume they can end up being any length.
-----
Change-Id: Iffe0e71b17b1a0c6fea77235e8aee5c093005811
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/94540
Commit-Queue: Stevie Strickland <sstrickl@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
2019-03-14 14:26:47 +00:00
|
|
|
// Are we at the start of the input, i.e. is (offset == string_length * -1)?
|
2020-01-09 01:37:27 +00:00
|
|
|
auto neg_len_def =
|
[VM] Adding regexp lookbehind assertion support.
See https://github.com/tc39/proposal-regexp-lookbehind
for a high-level description of the feature and examples. This is one of the
features requested in https://github.com/dart-lang/sdk/issues/34935.
This work takes the feature as present in the v8 engine and appropriately
merges it into our irregexp fork. Notable changes to the irregexp codebase to
introduce this feature:
-----
We can no longer assume that all matching proceeds forwards, since lookbehind
matching proceeds backwards. Similarly, we cannot assume that we can only be
at the start of a string if we started matching from that point. The direction
of matching must also be taken into consideration when doing bounds checking,
which previously assumed the engine would never attempt to look before the
start of a string.
-----
We may now parse backreferences to captures before the capture they
reference, since we parse regular expressions left to right, but lookbehinds
perform captures as they evaluate the string from right to left. Since
RegExpBackReference objects contain a pointer to their corresponding capture,
this means that we may need to create RegExpCapture objects prior to the
parsing of the corresponding captured subexpression.
Thus, RegExpCapture objects are now only initialized with their index, and the
body is set later when the subexpression is encountered and parsed. This means
any method that operates on the body of a RegExpCapture can no longer be const,
which also affects the rest of the RegExpTree class hierarchy. This also means
that we don't have a valid max_match length for backreferences based off the
capture body, and must assume they can end up being any length.
-----
Change-Id: Iffe0e71b17b1a0c6fea77235e8aee5c093005811
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/94540
Commit-Queue: Stevie Strickland <sstrickl@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
2019-03-14 14:26:47 +00:00
|
|
|
Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kNEGATE),
|
2020-01-09 01:37:27 +00:00
|
|
|
PushLocal(string_param_length_)));
|
2020-06-25 03:36:41 +00:00
|
|
|
auto current_pos_def = PushLocal(current_position_);
|
|
|
|
auto cp_offset_def = Bind(Int64Constant(cp_offset));
|
|
|
|
auto offset_def = Bind(Add(current_pos_def, cp_offset_def));
|
2016-11-08 21:54:47 +00:00
|
|
|
BranchOrBacktrack(Comparison(kNE, neg_len_def, offset_def), on_not_at_start);
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::CheckCharacterLT(uint16_t limit,
|
|
|
|
BlockLabel* on_less) {
|
|
|
|
TAG();
|
2016-11-08 21:54:47 +00:00
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kLT, LoadLocal(current_character_), Uint64Constant(limit)),
|
|
|
|
on_less);
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::CheckGreedyLoop(BlockLabel* on_equal) {
|
|
|
|
TAG();
|
|
|
|
|
|
|
|
BlockLabel fallthrough;
|
|
|
|
|
|
|
|
Definition* head = PeekStack();
|
|
|
|
Definition* cur_pos_def = LoadLocal(current_position_);
|
2016-11-08 21:54:47 +00:00
|
|
|
BranchOrBacktrack(Comparison(kNE, head, cur_pos_def), &fallthrough);
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
// Pop, throwing away the value.
|
|
|
|
Do(PopStack());
|
|
|
|
|
|
|
|
BranchOrBacktrack(NULL, on_equal);
|
|
|
|
|
|
|
|
BindBlock(&fallthrough);
|
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(
|
|
|
|
intptr_t start_reg,
|
[VM] Adding regexp lookbehind assertion support.
See https://github.com/tc39/proposal-regexp-lookbehind
for a high-level description of the feature and examples. This is one of the
features requested in https://github.com/dart-lang/sdk/issues/34935.
This work takes the feature as present in the v8 engine and appropriately
merges it into our irregexp fork. Notable changes to the irregexp codebase to
introduce this feature:
-----
We can no longer assume that all matching proceeds forwards, since lookbehind
matching proceeds backwards. Similarly, we cannot assume that we can only be
at the start of a string if we started matching from that point. The direction
of matching must also be taken into consideration when doing bounds checking,
which previously assumed the engine would never attempt to look before the
start of a string.
-----
We may now parse backreferences to captures before the capture they
reference, since we parse regular expressions left to right, but lookbehinds
perform captures as they evaluate the string from right to left. Since
RegExpBackReference objects contain a pointer to their corresponding capture,
this means that we may need to create RegExpCapture objects prior to the
parsing of the corresponding captured subexpression.
Thus, RegExpCapture objects are now only initialized with their index, and the
body is set later when the subexpression is encountered and parsed. This means
any method that operates on the body of a RegExpCapture can no longer be const,
which also affects the rest of the RegExpTree class hierarchy. This also means
that we don't have a valid max_match length for backreferences based off the
capture body, and must assume they can end up being any length.
-----
Change-Id: Iffe0e71b17b1a0c6fea77235e8aee5c093005811
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/94540
Commit-Queue: Stevie Strickland <sstrickl@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
2019-03-14 14:26:47 +00:00
|
|
|
bool read_backward,
|
2019-04-29 09:11:48 +00:00
|
|
|
bool unicode,
|
2015-07-07 21:43:32 +00:00
|
|
|
BlockLabel* on_no_match) {
|
|
|
|
TAG();
|
|
|
|
ASSERT(start_reg + 1 <= registers_count_);
|
|
|
|
|
|
|
|
BlockLabel fallthrough;
|
|
|
|
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* end_push = LoadRegister(start_reg + 1);
|
|
|
|
Value* start_push = LoadRegister(start_reg);
|
2015-07-07 21:43:32 +00:00
|
|
|
StoreLocal(capture_length_, Bind(Sub(end_push, start_push)));
|
|
|
|
|
|
|
|
// The length of a capture should not be negative. This can only happen
|
|
|
|
// if the end of the capture is unrecorded, or at a point earlier than
|
|
|
|
// the start of the capture.
|
|
|
|
// BranchOrBacktrack(less, on_no_match);
|
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kLT, LoadLocal(capture_length_), Uint64Constant(0)),
|
|
|
|
on_no_match);
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
// If length is zero, either the capture is empty or it is completely
|
|
|
|
// uncaptured. In either case succeed immediately.
|
2016-11-08 21:54:47 +00:00
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kEQ, LoadLocal(capture_length_), Uint64Constant(0)),
|
|
|
|
&fallthrough);
|
2015-07-07 21:43:32 +00:00
|
|
|
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* pos_push = nullptr;
|
|
|
|
Value* len_push = nullptr;
|
[VM] Adding regexp lookbehind assertion support.
See https://github.com/tc39/proposal-regexp-lookbehind
for a high-level description of the feature and examples. This is one of the
features requested in https://github.com/dart-lang/sdk/issues/34935.
This work takes the feature as present in the v8 engine and appropriately
merges it into our irregexp fork. Notable changes to the irregexp codebase to
introduce this feature:
-----
We can no longer assume that all matching proceeds forwards, since lookbehind
matching proceeds backwards. Similarly, we cannot assume that we can only be
at the start of a string if we started matching from that point. The direction
of matching must also be taken into consideration when doing bounds checking,
which previously assumed the engine would never attempt to look before the
start of a string.
-----
We may now parse backreferences to captures before the capture they
reference, since we parse regular expressions left to right, but lookbehinds
perform captures as they evaluate the string from right to left. Since
RegExpBackReference objects contain a pointer to their corresponding capture,
this means that we may need to create RegExpCapture objects prior to the
parsing of the corresponding captured subexpression.
Thus, RegExpCapture objects are now only initialized with their index, and the
body is set later when the subexpression is encountered and parsed. This means
any method that operates on the body of a RegExpCapture can no longer be const,
which also affects the rest of the RegExpTree class hierarchy. This also means
that we don't have a valid max_match length for backreferences based off the
capture body, and must assume they can end up being any length.
-----
Change-Id: Iffe0e71b17b1a0c6fea77235e8aee5c093005811
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/94540
Commit-Queue: Stevie Strickland <sstrickl@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
2019-03-14 14:26:47 +00:00
|
|
|
|
|
|
|
if (!read_backward) {
|
|
|
|
// Check that there are sufficient characters left in the input.
|
|
|
|
pos_push = PushLocal(current_position_);
|
|
|
|
len_push = PushLocal(capture_length_);
|
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kGT,
|
|
|
|
InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD),
|
|
|
|
pos_push, len_push),
|
|
|
|
Uint64Constant(0)),
|
|
|
|
on_no_match);
|
|
|
|
}
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
pos_push = PushLocal(current_position_);
|
|
|
|
len_push = PushLocal(string_param_length_);
|
|
|
|
StoreLocal(match_start_index_, Bind(Add(pos_push, len_push)));
|
|
|
|
|
[VM] Adding regexp lookbehind assertion support.
See https://github.com/tc39/proposal-regexp-lookbehind
for a high-level description of the feature and examples. This is one of the
features requested in https://github.com/dart-lang/sdk/issues/34935.
This work takes the feature as present in the v8 engine and appropriately
merges it into our irregexp fork. Notable changes to the irregexp codebase to
introduce this feature:
-----
We can no longer assume that all matching proceeds forwards, since lookbehind
matching proceeds backwards. Similarly, we cannot assume that we can only be
at the start of a string if we started matching from that point. The direction
of matching must also be taken into consideration when doing bounds checking,
which previously assumed the engine would never attempt to look before the
start of a string.
-----
We may now parse backreferences to captures before the capture they
reference, since we parse regular expressions left to right, but lookbehinds
perform captures as they evaluate the string from right to left. Since
RegExpBackReference objects contain a pointer to their corresponding capture,
this means that we may need to create RegExpCapture objects prior to the
parsing of the corresponding captured subexpression.
Thus, RegExpCapture objects are now only initialized with their index, and the
body is set later when the subexpression is encountered and parsed. This means
any method that operates on the body of a RegExpCapture can no longer be const,
which also affects the rest of the RegExpTree class hierarchy. This also means
that we don't have a valid max_match length for backreferences based off the
capture body, and must assume they can end up being any length.
-----
Change-Id: Iffe0e71b17b1a0c6fea77235e8aee5c093005811
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/94540
Commit-Queue: Stevie Strickland <sstrickl@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
2019-03-14 14:26:47 +00:00
|
|
|
if (read_backward) {
|
|
|
|
// First check that there are enough characters before this point in
|
|
|
|
// the string that we can match the backreference.
|
|
|
|
BranchOrBacktrack(Comparison(kLT, LoadLocal(match_start_index_),
|
|
|
|
LoadLocal(capture_length_)),
|
|
|
|
on_no_match);
|
|
|
|
|
|
|
|
// The string to check is before the current position, not at it.
|
|
|
|
pos_push = PushLocal(match_start_index_);
|
|
|
|
len_push = PushLocal(capture_length_);
|
|
|
|
StoreLocal(match_start_index_, Bind(Sub(pos_push, len_push)));
|
|
|
|
}
|
|
|
|
|
2020-01-09 01:37:27 +00:00
|
|
|
pos_push = LoadRegister(start_reg);
|
2015-07-07 21:43:32 +00:00
|
|
|
len_push = PushLocal(string_param_length_);
|
|
|
|
StoreLocal(capture_start_index_, Bind(Add(pos_push, len_push)));
|
|
|
|
|
|
|
|
pos_push = PushLocal(match_start_index_);
|
|
|
|
len_push = PushLocal(capture_length_);
|
|
|
|
StoreLocal(match_end_index_, Bind(Add(pos_push, len_push)));
|
|
|
|
|
|
|
|
BlockLabel success;
|
|
|
|
if (mode_ == ASCII) {
|
|
|
|
BlockLabel loop_increment;
|
|
|
|
BlockLabel loop;
|
|
|
|
BindBlock(&loop);
|
|
|
|
|
|
|
|
StoreLocal(char_in_capture_, CharacterAt(capture_start_index_));
|
|
|
|
StoreLocal(char_in_match_, CharacterAt(match_start_index_));
|
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kEQ, LoadLocal(char_in_capture_), LoadLocal(char_in_match_)),
|
|
|
|
&loop_increment);
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
// Mismatch, try case-insensitive match (converting letters to lower-case).
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* match_char_push = PushLocal(char_in_match_);
|
|
|
|
Value* mask_push = Bind(Uint64Constant(0x20));
|
2016-11-08 21:54:47 +00:00
|
|
|
StoreLocal(
|
|
|
|
char_in_match_,
|
|
|
|
Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_OR),
|
|
|
|
match_char_push, mask_push)));
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
BlockLabel convert_capture;
|
|
|
|
BlockLabel on_not_in_range;
|
2016-11-08 21:54:47 +00:00
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kLT, LoadLocal(char_in_match_), Uint64Constant('a')),
|
|
|
|
&on_not_in_range);
|
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kGT, LoadLocal(char_in_match_), Uint64Constant('z')),
|
|
|
|
&on_not_in_range);
|
2015-07-07 21:43:32 +00:00
|
|
|
GoTo(&convert_capture);
|
|
|
|
BindBlock(&on_not_in_range);
|
|
|
|
|
|
|
|
// Latin-1: Check for values in range [224,254] but not 247.
|
2016-11-08 21:54:47 +00:00
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kLT, LoadLocal(char_in_match_), Uint64Constant(224)),
|
|
|
|
on_no_match);
|
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kGT, LoadLocal(char_in_match_), Uint64Constant(254)),
|
|
|
|
on_no_match);
|
2015-07-07 21:43:32 +00:00
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kEQ, LoadLocal(char_in_match_), Uint64Constant(247)),
|
|
|
|
on_no_match);
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
// Also convert capture character.
|
|
|
|
BindBlock(&convert_capture);
|
|
|
|
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* capture_char_push = PushLocal(char_in_capture_);
|
|
|
|
mask_push = Bind(Uint64Constant(0x20));
|
2016-11-08 21:54:47 +00:00
|
|
|
StoreLocal(
|
|
|
|
char_in_capture_,
|
|
|
|
Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_OR),
|
|
|
|
capture_char_push, mask_push)));
|
|
|
|
|
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kNE, LoadLocal(char_in_match_), LoadLocal(char_in_capture_)),
|
|
|
|
on_no_match);
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
BindBlock(&loop_increment);
|
|
|
|
|
|
|
|
// Increment indexes into capture and match strings.
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* index_push = PushLocal(capture_start_index_);
|
|
|
|
Value* inc_push = Bind(Uint64Constant(1));
|
2015-07-07 21:43:32 +00:00
|
|
|
StoreLocal(capture_start_index_, Bind(Add(index_push, inc_push)));
|
|
|
|
|
|
|
|
index_push = PushLocal(match_start_index_);
|
2020-01-09 01:37:27 +00:00
|
|
|
inc_push = Bind(Uint64Constant(1));
|
2015-07-07 21:43:32 +00:00
|
|
|
StoreLocal(match_start_index_, Bind(Add(index_push, inc_push)));
|
|
|
|
|
|
|
|
// Compare to end of match, and loop if not done.
|
2016-11-08 21:54:47 +00:00
|
|
|
BranchOrBacktrack(Comparison(kLT, LoadLocal(match_start_index_),
|
2015-07-07 21:43:32 +00:00
|
|
|
LoadLocal(match_end_index_)),
|
|
|
|
&loop);
|
|
|
|
} else {
|
|
|
|
ASSERT(mode_ == UC16);
|
|
|
|
|
|
|
|
Value* string_value = Bind(LoadLocal(string_param_));
|
|
|
|
Value* lhs_index_value = Bind(LoadLocal(match_start_index_));
|
|
|
|
Value* rhs_index_value = Bind(LoadLocal(capture_start_index_));
|
|
|
|
Value* length_value = Bind(LoadLocal(capture_length_));
|
|
|
|
|
2019-04-29 09:11:48 +00:00
|
|
|
Definition* is_match_def;
|
|
|
|
|
|
|
|
if (unicode) {
|
|
|
|
is_match_def = new (Z) CaseInsensitiveCompareInstr(
|
|
|
|
string_value, lhs_index_value, rhs_index_value, length_value,
|
|
|
|
kCaseInsensitiveCompareUTF16RuntimeEntry, specialization_cid_);
|
|
|
|
} else {
|
|
|
|
is_match_def = new (Z) CaseInsensitiveCompareInstr(
|
|
|
|
string_value, lhs_index_value, rhs_index_value, length_value,
|
|
|
|
kCaseInsensitiveCompareUCS2RuntimeEntry, specialization_cid_);
|
|
|
|
}
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
BranchOrBacktrack(Comparison(kNE, is_match_def, BoolConstant(true)),
|
|
|
|
on_no_match);
|
|
|
|
}
|
|
|
|
|
|
|
|
BindBlock(&success);
|
|
|
|
|
[VM] Adding regexp lookbehind assertion support.
See https://github.com/tc39/proposal-regexp-lookbehind
for a high-level description of the feature and examples. This is one of the
features requested in https://github.com/dart-lang/sdk/issues/34935.
This work takes the feature as present in the v8 engine and appropriately
merges it into our irregexp fork. Notable changes to the irregexp codebase to
introduce this feature:
-----
We can no longer assume that all matching proceeds forwards, since lookbehind
matching proceeds backwards. Similarly, we cannot assume that we can only be
at the start of a string if we started matching from that point. The direction
of matching must also be taken into consideration when doing bounds checking,
which previously assumed the engine would never attempt to look before the
start of a string.
-----
We may now parse backreferences to captures before the capture they
reference, since we parse regular expressions left to right, but lookbehinds
perform captures as they evaluate the string from right to left. Since
RegExpBackReference objects contain a pointer to their corresponding capture,
this means that we may need to create RegExpCapture objects prior to the
parsing of the corresponding captured subexpression.
Thus, RegExpCapture objects are now only initialized with their index, and the
body is set later when the subexpression is encountered and parsed. This means
any method that operates on the body of a RegExpCapture can no longer be const,
which also affects the rest of the RegExpTree class hierarchy. This also means
that we don't have a valid max_match length for backreferences based off the
capture body, and must assume they can end up being any length.
-----
Change-Id: Iffe0e71b17b1a0c6fea77235e8aee5c093005811
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/94540
Commit-Queue: Stevie Strickland <sstrickl@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
2019-03-14 14:26:47 +00:00
|
|
|
if (read_backward) {
|
|
|
|
// Move current character position to start of match.
|
|
|
|
pos_push = PushLocal(current_position_);
|
|
|
|
len_push = PushLocal(capture_length_);
|
|
|
|
StoreLocal(current_position_, Bind(Sub(pos_push, len_push)));
|
|
|
|
} else {
|
|
|
|
// Move current character position to position after match.
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* match_end_push = PushLocal(match_end_index_);
|
[VM] Adding regexp lookbehind assertion support.
See https://github.com/tc39/proposal-regexp-lookbehind
for a high-level description of the feature and examples. This is one of the
features requested in https://github.com/dart-lang/sdk/issues/34935.
This work takes the feature as present in the v8 engine and appropriately
merges it into our irregexp fork. Notable changes to the irregexp codebase to
introduce this feature:
-----
We can no longer assume that all matching proceeds forwards, since lookbehind
matching proceeds backwards. Similarly, we cannot assume that we can only be
at the start of a string if we started matching from that point. The direction
of matching must also be taken into consideration when doing bounds checking,
which previously assumed the engine would never attempt to look before the
start of a string.
-----
We may now parse backreferences to captures before the capture they
reference, since we parse regular expressions left to right, but lookbehinds
perform captures as they evaluate the string from right to left. Since
RegExpBackReference objects contain a pointer to their corresponding capture,
this means that we may need to create RegExpCapture objects prior to the
parsing of the corresponding captured subexpression.
Thus, RegExpCapture objects are now only initialized with their index, and the
body is set later when the subexpression is encountered and parsed. This means
any method that operates on the body of a RegExpCapture can no longer be const,
which also affects the rest of the RegExpTree class hierarchy. This also means
that we don't have a valid max_match length for backreferences based off the
capture body, and must assume they can end up being any length.
-----
Change-Id: Iffe0e71b17b1a0c6fea77235e8aee5c093005811
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/94540
Commit-Queue: Stevie Strickland <sstrickl@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
2019-03-14 14:26:47 +00:00
|
|
|
len_push = PushLocal(string_param_length_);
|
|
|
|
StoreLocal(current_position_, Bind(Sub(match_end_push, len_push)));
|
|
|
|
}
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
BindBlock(&fallthrough);
|
|
|
|
}
|
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
void IRRegExpMacroAssembler::CheckNotBackReference(intptr_t start_reg,
|
[VM] Adding regexp lookbehind assertion support.
See https://github.com/tc39/proposal-regexp-lookbehind
for a high-level description of the feature and examples. This is one of the
features requested in https://github.com/dart-lang/sdk/issues/34935.
This work takes the feature as present in the v8 engine and appropriately
merges it into our irregexp fork. Notable changes to the irregexp codebase to
introduce this feature:
-----
We can no longer assume that all matching proceeds forwards, since lookbehind
matching proceeds backwards. Similarly, we cannot assume that we can only be
at the start of a string if we started matching from that point. The direction
of matching must also be taken into consideration when doing bounds checking,
which previously assumed the engine would never attempt to look before the
start of a string.
-----
We may now parse backreferences to captures before the capture they
reference, since we parse regular expressions left to right, but lookbehinds
perform captures as they evaluate the string from right to left. Since
RegExpBackReference objects contain a pointer to their corresponding capture,
this means that we may need to create RegExpCapture objects prior to the
parsing of the corresponding captured subexpression.
Thus, RegExpCapture objects are now only initialized with their index, and the
body is set later when the subexpression is encountered and parsed. This means
any method that operates on the body of a RegExpCapture can no longer be const,
which also affects the rest of the RegExpTree class hierarchy. This also means
that we don't have a valid max_match length for backreferences based off the
capture body, and must assume they can end up being any length.
-----
Change-Id: Iffe0e71b17b1a0c6fea77235e8aee5c093005811
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/94540
Commit-Queue: Stevie Strickland <sstrickl@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
2019-03-14 14:26:47 +00:00
|
|
|
bool read_backward,
|
2016-11-08 21:54:47 +00:00
|
|
|
BlockLabel* on_no_match) {
|
2015-07-07 21:43:32 +00:00
|
|
|
TAG();
|
|
|
|
ASSERT(start_reg + 1 <= registers_count_);
|
|
|
|
|
|
|
|
BlockLabel fallthrough;
|
|
|
|
BlockLabel success;
|
|
|
|
|
|
|
|
// Find length of back-referenced capture.
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* end_push = LoadRegister(start_reg + 1);
|
|
|
|
Value* start_push = LoadRegister(start_reg);
|
2015-07-07 21:43:32 +00:00
|
|
|
StoreLocal(capture_length_, Bind(Sub(end_push, start_push)));
|
|
|
|
|
|
|
|
// Fail on partial or illegal capture (start of capture after end of capture).
|
2016-11-08 21:54:47 +00:00
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kLT, LoadLocal(capture_length_), Uint64Constant(0)),
|
|
|
|
on_no_match);
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
// Succeed on empty capture (including no capture)
|
2016-11-08 21:54:47 +00:00
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kEQ, LoadLocal(capture_length_), Uint64Constant(0)),
|
|
|
|
&fallthrough);
|
2015-07-07 21:43:32 +00:00
|
|
|
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* pos_push = nullptr;
|
|
|
|
Value* len_push = nullptr;
|
[VM] Adding regexp lookbehind assertion support.
See https://github.com/tc39/proposal-regexp-lookbehind
for a high-level description of the feature and examples. This is one of the
features requested in https://github.com/dart-lang/sdk/issues/34935.
This work takes the feature as present in the v8 engine and appropriately
merges it into our irregexp fork. Notable changes to the irregexp codebase to
introduce this feature:
-----
We can no longer assume that all matching proceeds forwards, since lookbehind
matching proceeds backwards. Similarly, we cannot assume that we can only be
at the start of a string if we started matching from that point. The direction
of matching must also be taken into consideration when doing bounds checking,
which previously assumed the engine would never attempt to look before the
start of a string.
-----
We may now parse backreferences to captures before the capture they
reference, since we parse regular expressions left to right, but lookbehinds
perform captures as they evaluate the string from right to left. Since
RegExpBackReference objects contain a pointer to their corresponding capture,
this means that we may need to create RegExpCapture objects prior to the
parsing of the corresponding captured subexpression.
Thus, RegExpCapture objects are now only initialized with their index, and the
body is set later when the subexpression is encountered and parsed. This means
any method that operates on the body of a RegExpCapture can no longer be const,
which also affects the rest of the RegExpTree class hierarchy. This also means
that we don't have a valid max_match length for backreferences based off the
capture body, and must assume they can end up being any length.
-----
Change-Id: Iffe0e71b17b1a0c6fea77235e8aee5c093005811
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/94540
Commit-Queue: Stevie Strickland <sstrickl@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
2019-03-14 14:26:47 +00:00
|
|
|
|
|
|
|
if (!read_backward) {
|
|
|
|
// Check that there are sufficient characters left in the input.
|
|
|
|
pos_push = PushLocal(current_position_);
|
|
|
|
len_push = PushLocal(capture_length_);
|
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kGT,
|
|
|
|
InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD),
|
|
|
|
pos_push, len_push),
|
|
|
|
Uint64Constant(0)),
|
|
|
|
on_no_match);
|
|
|
|
}
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
// Compute pointers to match string and capture string.
|
|
|
|
pos_push = PushLocal(current_position_);
|
|
|
|
len_push = PushLocal(string_param_length_);
|
|
|
|
StoreLocal(match_start_index_, Bind(Add(pos_push, len_push)));
|
|
|
|
|
[VM] Adding regexp lookbehind assertion support.
See https://github.com/tc39/proposal-regexp-lookbehind
for a high-level description of the feature and examples. This is one of the
features requested in https://github.com/dart-lang/sdk/issues/34935.
This work takes the feature as present in the v8 engine and appropriately
merges it into our irregexp fork. Notable changes to the irregexp codebase to
introduce this feature:
-----
We can no longer assume that all matching proceeds forwards, since lookbehind
matching proceeds backwards. Similarly, we cannot assume that we can only be
at the start of a string if we started matching from that point. The direction
of matching must also be taken into consideration when doing bounds checking,
which previously assumed the engine would never attempt to look before the
start of a string.
-----
We may now parse backreferences to captures before the capture they
reference, since we parse regular expressions left to right, but lookbehinds
perform captures as they evaluate the string from right to left. Since
RegExpBackReference objects contain a pointer to their corresponding capture,
this means that we may need to create RegExpCapture objects prior to the
parsing of the corresponding captured subexpression.
Thus, RegExpCapture objects are now only initialized with their index, and the
body is set later when the subexpression is encountered and parsed. This means
any method that operates on the body of a RegExpCapture can no longer be const,
which also affects the rest of the RegExpTree class hierarchy. This also means
that we don't have a valid max_match length for backreferences based off the
capture body, and must assume they can end up being any length.
-----
Change-Id: Iffe0e71b17b1a0c6fea77235e8aee5c093005811
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/94540
Commit-Queue: Stevie Strickland <sstrickl@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
2019-03-14 14:26:47 +00:00
|
|
|
if (read_backward) {
|
|
|
|
// First check that there are enough characters before this point in
|
|
|
|
// the string that we can match the backreference.
|
|
|
|
BranchOrBacktrack(Comparison(kLT, LoadLocal(match_start_index_),
|
|
|
|
LoadLocal(capture_length_)),
|
|
|
|
on_no_match);
|
|
|
|
|
|
|
|
// The string to check is before the current position, not at it.
|
|
|
|
pos_push = PushLocal(match_start_index_);
|
|
|
|
len_push = PushLocal(capture_length_);
|
|
|
|
StoreLocal(match_start_index_, Bind(Sub(pos_push, len_push)));
|
|
|
|
}
|
|
|
|
|
2020-01-09 01:37:27 +00:00
|
|
|
pos_push = LoadRegister(start_reg);
|
2015-07-07 21:43:32 +00:00
|
|
|
len_push = PushLocal(string_param_length_);
|
|
|
|
StoreLocal(capture_start_index_, Bind(Add(pos_push, len_push)));
|
|
|
|
|
|
|
|
pos_push = PushLocal(match_start_index_);
|
|
|
|
len_push = PushLocal(capture_length_);
|
|
|
|
StoreLocal(match_end_index_, Bind(Add(pos_push, len_push)));
|
|
|
|
|
|
|
|
BlockLabel loop;
|
|
|
|
BindBlock(&loop);
|
|
|
|
|
|
|
|
StoreLocal(char_in_capture_, CharacterAt(capture_start_index_));
|
|
|
|
StoreLocal(char_in_match_, CharacterAt(match_start_index_));
|
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kNE, LoadLocal(char_in_capture_), LoadLocal(char_in_match_)),
|
|
|
|
on_no_match);
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
// Increment indexes into capture and match strings.
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* index_push = PushLocal(capture_start_index_);
|
|
|
|
Value* inc_push = Bind(Uint64Constant(1));
|
2015-07-07 21:43:32 +00:00
|
|
|
StoreLocal(capture_start_index_, Bind(Add(index_push, inc_push)));
|
|
|
|
|
|
|
|
index_push = PushLocal(match_start_index_);
|
2020-01-09 01:37:27 +00:00
|
|
|
inc_push = Bind(Uint64Constant(1));
|
2015-07-07 21:43:32 +00:00
|
|
|
StoreLocal(match_start_index_, Bind(Add(index_push, inc_push)));
|
|
|
|
|
|
|
|
// Check if we have reached end of match area.
|
2016-11-08 21:54:47 +00:00
|
|
|
BranchOrBacktrack(Comparison(kLT, LoadLocal(match_start_index_),
|
2015-07-07 21:43:32 +00:00
|
|
|
LoadLocal(match_end_index_)),
|
|
|
|
&loop);
|
|
|
|
|
|
|
|
BindBlock(&success);
|
|
|
|
|
[VM] Adding regexp lookbehind assertion support.
See https://github.com/tc39/proposal-regexp-lookbehind
for a high-level description of the feature and examples. This is one of the
features requested in https://github.com/dart-lang/sdk/issues/34935.
This work takes the feature as present in the v8 engine and appropriately
merges it into our irregexp fork. Notable changes to the irregexp codebase to
introduce this feature:
-----
We can no longer assume that all matching proceeds forwards, since lookbehind
matching proceeds backwards. Similarly, we cannot assume that we can only be
at the start of a string if we started matching from that point. The direction
of matching must also be taken into consideration when doing bounds checking,
which previously assumed the engine would never attempt to look before the
start of a string.
-----
We may now parse backreferences to captures before the capture they
reference, since we parse regular expressions left to right, but lookbehinds
perform captures as they evaluate the string from right to left. Since
RegExpBackReference objects contain a pointer to their corresponding capture,
this means that we may need to create RegExpCapture objects prior to the
parsing of the corresponding captured subexpression.
Thus, RegExpCapture objects are now only initialized with their index, and the
body is set later when the subexpression is encountered and parsed. This means
any method that operates on the body of a RegExpCapture can no longer be const,
which also affects the rest of the RegExpTree class hierarchy. This also means
that we don't have a valid max_match length for backreferences based off the
capture body, and must assume they can end up being any length.
-----
Change-Id: Iffe0e71b17b1a0c6fea77235e8aee5c093005811
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/94540
Commit-Queue: Stevie Strickland <sstrickl@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
2019-03-14 14:26:47 +00:00
|
|
|
if (read_backward) {
|
|
|
|
// Move current character position to start of match.
|
|
|
|
pos_push = PushLocal(current_position_);
|
|
|
|
len_push = PushLocal(capture_length_);
|
|
|
|
StoreLocal(current_position_, Bind(Sub(pos_push, len_push)));
|
|
|
|
} else {
|
|
|
|
// Move current character position to position after match.
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* match_end_push = PushLocal(match_end_index_);
|
[VM] Adding regexp lookbehind assertion support.
See https://github.com/tc39/proposal-regexp-lookbehind
for a high-level description of the feature and examples. This is one of the
features requested in https://github.com/dart-lang/sdk/issues/34935.
This work takes the feature as present in the v8 engine and appropriately
merges it into our irregexp fork. Notable changes to the irregexp codebase to
introduce this feature:
-----
We can no longer assume that all matching proceeds forwards, since lookbehind
matching proceeds backwards. Similarly, we cannot assume that we can only be
at the start of a string if we started matching from that point. The direction
of matching must also be taken into consideration when doing bounds checking,
which previously assumed the engine would never attempt to look before the
start of a string.
-----
We may now parse backreferences to captures before the capture they
reference, since we parse regular expressions left to right, but lookbehinds
perform captures as they evaluate the string from right to left. Since
RegExpBackReference objects contain a pointer to their corresponding capture,
this means that we may need to create RegExpCapture objects prior to the
parsing of the corresponding captured subexpression.
Thus, RegExpCapture objects are now only initialized with their index, and the
body is set later when the subexpression is encountered and parsed. This means
any method that operates on the body of a RegExpCapture can no longer be const,
which also affects the rest of the RegExpTree class hierarchy. This also means
that we don't have a valid max_match length for backreferences based off the
capture body, and must assume they can end up being any length.
-----
Change-Id: Iffe0e71b17b1a0c6fea77235e8aee5c093005811
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/94540
Commit-Queue: Stevie Strickland <sstrickl@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
2019-03-14 14:26:47 +00:00
|
|
|
len_push = PushLocal(string_param_length_);
|
|
|
|
StoreLocal(current_position_, Bind(Sub(match_end_push, len_push)));
|
|
|
|
}
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
BindBlock(&fallthrough);
|
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::CheckNotCharacter(uint32_t c,
|
|
|
|
BlockLabel* on_not_equal) {
|
|
|
|
TAG();
|
2016-11-08 21:54:47 +00:00
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kNE, LoadLocal(current_character_), Uint64Constant(c)),
|
|
|
|
on_not_equal);
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::CheckCharacterAfterAnd(uint32_t c,
|
|
|
|
uint32_t mask,
|
|
|
|
BlockLabel* on_equal) {
|
|
|
|
TAG();
|
|
|
|
|
|
|
|
Definition* actual_def = LoadLocal(current_character_);
|
|
|
|
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* actual_push = Bind(actual_def);
|
|
|
|
Value* mask_push = Bind(Uint64Constant(mask));
|
2015-07-07 21:43:32 +00:00
|
|
|
actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
|
2016-11-08 21:54:47 +00:00
|
|
|
actual_push, mask_push);
|
2020-01-09 01:37:27 +00:00
|
|
|
Definition* expected_def = Uint64Constant(c);
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
BranchOrBacktrack(Comparison(kEQ, actual_def, expected_def), on_equal);
|
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::CheckNotCharacterAfterAnd(
|
|
|
|
uint32_t c,
|
|
|
|
uint32_t mask,
|
|
|
|
BlockLabel* on_not_equal) {
|
|
|
|
TAG();
|
|
|
|
|
|
|
|
Definition* actual_def = LoadLocal(current_character_);
|
|
|
|
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* actual_push = Bind(actual_def);
|
|
|
|
Value* mask_push = Bind(Uint64Constant(mask));
|
2015-07-07 21:43:32 +00:00
|
|
|
actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
|
2016-11-08 21:54:47 +00:00
|
|
|
actual_push, mask_push);
|
2020-01-09 01:37:27 +00:00
|
|
|
Definition* expected_def = Uint64Constant(c);
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
BranchOrBacktrack(Comparison(kNE, actual_def, expected_def), on_not_equal);
|
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::CheckNotCharacterAfterMinusAnd(
|
|
|
|
uint16_t c,
|
|
|
|
uint16_t minus,
|
|
|
|
uint16_t mask,
|
|
|
|
BlockLabel* on_not_equal) {
|
|
|
|
TAG();
|
|
|
|
ASSERT(minus < Utf16::kMaxCodeUnit); // NOLINT
|
|
|
|
|
|
|
|
Definition* actual_def = LoadLocal(current_character_);
|
|
|
|
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* actual_push = Bind(actual_def);
|
|
|
|
Value* minus_push = Bind(Uint64Constant(minus));
|
2015-07-07 21:43:32 +00:00
|
|
|
|
2020-01-09 01:37:27 +00:00
|
|
|
actual_push = Bind(Sub(actual_push, minus_push));
|
|
|
|
Value* mask_push = Bind(Uint64Constant(mask));
|
2015-07-07 21:43:32 +00:00
|
|
|
actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
|
2016-11-08 21:54:47 +00:00
|
|
|
actual_push, mask_push);
|
2020-01-09 01:37:27 +00:00
|
|
|
Definition* expected_def = Uint64Constant(c);
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
BranchOrBacktrack(Comparison(kNE, actual_def, expected_def), on_not_equal);
|
|
|
|
}
|
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
void IRRegExpMacroAssembler::CheckCharacterInRange(uint16_t from,
|
|
|
|
uint16_t to,
|
|
|
|
BlockLabel* on_in_range) {
|
2015-07-07 21:43:32 +00:00
|
|
|
TAG();
|
|
|
|
ASSERT(from <= to);
|
|
|
|
|
|
|
|
// TODO(zerny): All range comparisons could be done cheaper with unsigned
|
|
|
|
// compares. This pattern repeats in various places.
|
|
|
|
|
|
|
|
BlockLabel on_not_in_range;
|
2016-11-08 21:54:47 +00:00
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kLT, LoadLocal(current_character_), Uint64Constant(from)),
|
|
|
|
&on_not_in_range);
|
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kGT, LoadLocal(current_character_), Uint64Constant(to)),
|
|
|
|
&on_not_in_range);
|
2015-07-07 21:43:32 +00:00
|
|
|
BranchOrBacktrack(NULL, on_in_range);
|
|
|
|
|
|
|
|
BindBlock(&on_not_in_range);
|
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::CheckCharacterNotInRange(
|
|
|
|
uint16_t from,
|
|
|
|
uint16_t to,
|
|
|
|
BlockLabel* on_not_in_range) {
|
|
|
|
TAG();
|
|
|
|
ASSERT(from <= to);
|
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kLT, LoadLocal(current_character_), Uint64Constant(from)),
|
|
|
|
on_not_in_range);
|
2015-07-07 21:43:32 +00:00
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kGT, LoadLocal(current_character_), Uint64Constant(to)),
|
|
|
|
on_not_in_range);
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
void IRRegExpMacroAssembler::CheckBitInTable(const TypedData& table,
|
|
|
|
BlockLabel* on_bit_set) {
|
2015-07-07 21:43:32 +00:00
|
|
|
TAG();
|
|
|
|
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* table_push = Bind(new (Z) ConstantInstr(table));
|
|
|
|
Value* index_push = PushLocal(current_character_);
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
if (mode_ != ASCII || kTableMask != Symbols::kMaxOneCharCodeSymbol) {
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* mask_push = Bind(Uint64Constant(kTableSize - 1));
|
|
|
|
index_push =
|
2016-11-08 21:54:47 +00:00
|
|
|
Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
|
2020-01-09 01:37:27 +00:00
|
|
|
index_push, mask_push));
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
Definition* byte_def = InstanceCall(
|
|
|
|
InstanceCallDescriptor::FromToken(Token::kINDEX), table_push, index_push);
|
2015-07-07 21:43:32 +00:00
|
|
|
Definition* zero_def = Int64Constant(0);
|
|
|
|
|
|
|
|
BranchOrBacktrack(Comparison(kNE, byte_def, zero_def), on_bit_set);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool IRRegExpMacroAssembler::CheckSpecialCharacterClass(
|
|
|
|
uint16_t type,
|
|
|
|
BlockLabel* on_no_match) {
|
|
|
|
TAG();
|
|
|
|
|
|
|
|
// Range checks (c in min..max) are generally implemented by an unsigned
|
|
|
|
// (c - min) <= (max - min) check
|
|
|
|
switch (type) {
|
2016-11-08 21:54:47 +00:00
|
|
|
case 's':
|
|
|
|
// Match space-characters
|
|
|
|
if (mode_ == ASCII) {
|
|
|
|
// One byte space characters are '\t'..'\r', ' ' and \u00a0.
|
|
|
|
BlockLabel success;
|
|
|
|
// Space (' ').
|
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kEQ, LoadLocal(current_character_), Uint64Constant(' ')),
|
|
|
|
&success);
|
|
|
|
// Check range 0x09..0x0d.
|
|
|
|
CheckCharacterInRange('\t', '\r', &success);
|
|
|
|
// \u00a0 (NBSP).
|
|
|
|
BranchOrBacktrack(Comparison(kNE, LoadLocal(current_character_),
|
|
|
|
Uint64Constant(0x00a0)),
|
|
|
|
on_no_match);
|
|
|
|
BindBlock(&success);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
case 'S':
|
|
|
|
// The emitted code for generic character classes is good enough.
|
|
|
|
return false;
|
|
|
|
case 'd':
|
|
|
|
// Match ASCII digits ('0'..'9')
|
|
|
|
CheckCharacterNotInRange('0', '9', on_no_match);
|
|
|
|
return true;
|
|
|
|
case 'D':
|
|
|
|
// Match non ASCII-digits
|
|
|
|
CheckCharacterInRange('0', '9', on_no_match);
|
|
|
|
return true;
|
|
|
|
case '.': {
|
|
|
|
// Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
|
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kEQ, LoadLocal(current_character_), Uint64Constant('\n')),
|
|
|
|
on_no_match);
|
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kEQ, LoadLocal(current_character_), Uint64Constant('\r')),
|
|
|
|
on_no_match);
|
|
|
|
if (mode_ == UC16) {
|
|
|
|
BranchOrBacktrack(Comparison(kEQ, LoadLocal(current_character_),
|
|
|
|
Uint64Constant(0x2028)),
|
|
|
|
on_no_match);
|
|
|
|
BranchOrBacktrack(Comparison(kEQ, LoadLocal(current_character_),
|
|
|
|
Uint64Constant(0x2029)),
|
|
|
|
on_no_match);
|
|
|
|
}
|
2015-07-07 21:43:32 +00:00
|
|
|
return true;
|
|
|
|
}
|
2016-11-08 21:54:47 +00:00
|
|
|
case 'w': {
|
|
|
|
if (mode_ != ASCII) {
|
|
|
|
// Table is 128 entries, so all ASCII characters can be tested.
|
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kGT, LoadLocal(current_character_), Uint64Constant('z')),
|
|
|
|
on_no_match);
|
|
|
|
}
|
2015-07-07 21:43:32 +00:00
|
|
|
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* table_push = Bind(WordCharacterMapConstant());
|
|
|
|
Value* index_push = PushLocal(current_character_);
|
2015-07-07 21:43:32 +00:00
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
Definition* byte_def =
|
|
|
|
InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
|
|
|
|
table_push, index_push);
|
|
|
|
Definition* zero_def = Int64Constant(0);
|
2015-07-07 21:43:32 +00:00
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
BranchOrBacktrack(Comparison(kEQ, byte_def, zero_def), on_no_match);
|
2015-07-07 21:43:32 +00:00
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
return true;
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
2016-11-08 21:54:47 +00:00
|
|
|
case 'W': {
|
|
|
|
BlockLabel done;
|
|
|
|
if (mode_ != ASCII) {
|
|
|
|
// Table is 128 entries, so all ASCII characters can be tested.
|
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kGT, LoadLocal(current_character_), Uint64Constant('z')),
|
|
|
|
&done);
|
|
|
|
}
|
2015-07-07 21:43:32 +00:00
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
// TODO(zerny): Refactor to use CheckBitInTable if possible.
|
2015-07-07 21:43:32 +00:00
|
|
|
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* table_push = Bind(WordCharacterMapConstant());
|
|
|
|
Value* index_push = PushLocal(current_character_);
|
2015-07-07 21:43:32 +00:00
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
Definition* byte_def =
|
|
|
|
InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
|
|
|
|
table_push, index_push);
|
|
|
|
Definition* zero_def = Int64Constant(0);
|
2015-07-07 21:43:32 +00:00
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
BranchOrBacktrack(Comparison(kNE, byte_def, zero_def), on_no_match);
|
2015-07-07 21:43:32 +00:00
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
if (mode_ != ASCII) {
|
|
|
|
BindBlock(&done);
|
|
|
|
}
|
|
|
|
return true;
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
2016-11-08 21:54:47 +00:00
|
|
|
// Non-standard classes (with no syntactic shorthand) used internally.
|
|
|
|
case '*':
|
|
|
|
// Match any character.
|
|
|
|
return true;
|
|
|
|
case 'n': {
|
|
|
|
// Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 or 0x2029).
|
|
|
|
// The opposite of '.'.
|
|
|
|
BlockLabel success;
|
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kEQ, LoadLocal(current_character_), Uint64Constant('\n')),
|
|
|
|
&success);
|
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kEQ, LoadLocal(current_character_), Uint64Constant('\r')),
|
|
|
|
&success);
|
|
|
|
if (mode_ == UC16) {
|
|
|
|
BranchOrBacktrack(Comparison(kEQ, LoadLocal(current_character_),
|
|
|
|
Uint64Constant(0x2028)),
|
|
|
|
&success);
|
|
|
|
BranchOrBacktrack(Comparison(kEQ, LoadLocal(current_character_),
|
|
|
|
Uint64Constant(0x2029)),
|
|
|
|
&success);
|
|
|
|
}
|
|
|
|
BranchOrBacktrack(NULL, on_no_match);
|
|
|
|
BindBlock(&success);
|
|
|
|
return true;
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
2016-11-08 21:54:47 +00:00
|
|
|
// No custom implementation (yet): s(uint16_t), S(uint16_t).
|
|
|
|
default:
|
|
|
|
return false;
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::Fail() {
|
|
|
|
TAG();
|
|
|
|
ASSERT(FAILURE == 0); // Return value for failure is zero.
|
|
|
|
if (!global()) {
|
|
|
|
UNREACHABLE(); // Dart regexps are always global.
|
|
|
|
}
|
|
|
|
GoTo(exit_block_);
|
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::IfRegisterGE(intptr_t reg,
|
|
|
|
intptr_t comparand,
|
|
|
|
BlockLabel* if_ge) {
|
|
|
|
TAG();
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* reg_push = LoadRegister(reg);
|
|
|
|
Value* pos = Bind(Int64Constant(comparand));
|
2015-07-07 21:43:32 +00:00
|
|
|
BranchOrBacktrack(Comparison(kGTE, reg_push, pos), if_ge);
|
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::IfRegisterLT(intptr_t reg,
|
|
|
|
intptr_t comparand,
|
|
|
|
BlockLabel* if_lt) {
|
|
|
|
TAG();
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* reg_push = LoadRegister(reg);
|
|
|
|
Value* pos = Bind(Int64Constant(comparand));
|
2015-07-07 21:43:32 +00:00
|
|
|
BranchOrBacktrack(Comparison(kLT, reg_push, pos), if_lt);
|
|
|
|
}
|
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
void IRRegExpMacroAssembler::IfRegisterEqPos(intptr_t reg, BlockLabel* if_eq) {
|
2015-07-07 21:43:32 +00:00
|
|
|
TAG();
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* reg_push = LoadRegister(reg);
|
|
|
|
Value* pos = Bind(LoadLocal(current_position_));
|
2015-07-07 21:43:32 +00:00
|
|
|
BranchOrBacktrack(Comparison(kEQ, reg_push, pos), if_eq);
|
|
|
|
}
|
|
|
|
|
|
|
|
RegExpMacroAssembler::IrregexpImplementation
|
2016-11-08 21:54:47 +00:00
|
|
|
IRRegExpMacroAssembler::Implementation() {
|
2015-07-07 21:43:32 +00:00
|
|
|
return kIRImplementation;
|
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::LoadCurrentCharacter(intptr_t cp_offset,
|
|
|
|
BlockLabel* on_end_of_input,
|
|
|
|
bool check_bounds,
|
|
|
|
intptr_t characters) {
|
|
|
|
TAG();
|
2016-11-08 21:54:47 +00:00
|
|
|
ASSERT(cp_offset < (1 << 30)); // Be sane! (And ensure negation works)
|
2015-07-07 21:43:32 +00:00
|
|
|
if (check_bounds) {
|
[VM] Adding regexp lookbehind assertion support.
See https://github.com/tc39/proposal-regexp-lookbehind
for a high-level description of the feature and examples. This is one of the
features requested in https://github.com/dart-lang/sdk/issues/34935.
This work takes the feature as present in the v8 engine and appropriately
merges it into our irregexp fork. Notable changes to the irregexp codebase to
introduce this feature:
-----
We can no longer assume that all matching proceeds forwards, since lookbehind
matching proceeds backwards. Similarly, we cannot assume that we can only be
at the start of a string if we started matching from that point. The direction
of matching must also be taken into consideration when doing bounds checking,
which previously assumed the engine would never attempt to look before the
start of a string.
-----
We may now parse backreferences to captures before the capture they
reference, since we parse regular expressions left to right, but lookbehinds
perform captures as they evaluate the string from right to left. Since
RegExpBackReference objects contain a pointer to their corresponding capture,
this means that we may need to create RegExpCapture objects prior to the
parsing of the corresponding captured subexpression.
Thus, RegExpCapture objects are now only initialized with their index, and the
body is set later when the subexpression is encountered and parsed. This means
any method that operates on the body of a RegExpCapture can no longer be const,
which also affects the rest of the RegExpTree class hierarchy. This also means
that we don't have a valid max_match length for backreferences based off the
capture body, and must assume they can end up being any length.
-----
Change-Id: Iffe0e71b17b1a0c6fea77235e8aee5c093005811
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/94540
Commit-Queue: Stevie Strickland <sstrickl@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
2019-03-14 14:26:47 +00:00
|
|
|
if (cp_offset >= 0) {
|
|
|
|
CheckPosition(cp_offset + characters - 1, on_end_of_input);
|
|
|
|
} else {
|
|
|
|
CheckPosition(cp_offset, on_end_of_input);
|
|
|
|
}
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
LoadCurrentCharacterUnchecked(cp_offset, characters);
|
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::PopCurrentPosition() {
|
|
|
|
TAG();
|
|
|
|
StoreLocal(current_position_, Bind(PopStack()));
|
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::PopRegister(intptr_t reg) {
|
|
|
|
TAG();
|
|
|
|
ASSERT(reg < registers_count_);
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* registers_push = PushLocal(registers_);
|
|
|
|
Value* index_push = PushRegisterIndex(reg);
|
|
|
|
Value* pop_push = Bind(PopStack());
|
2015-07-07 21:43:32 +00:00
|
|
|
StoreRegister(registers_push, index_push, pop_push);
|
|
|
|
}
|
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
void IRRegExpMacroAssembler::PushStack(Definition* definition) {
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* stack_push = PushLocal(stack_);
|
|
|
|
Value* stack_pointer_push = PushLocal(stack_pointer_);
|
|
|
|
StoreLocal(stack_pointer_,
|
|
|
|
Bind(Add(stack_pointer_push, Bind(Uint64Constant(1)))));
|
2015-07-07 21:43:32 +00:00
|
|
|
stack_pointer_push = PushLocal(stack_pointer_);
|
|
|
|
// TODO(zerny): bind value and push could break stack discipline.
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* value_push = Bind(definition);
|
2015-07-07 21:43:32 +00:00
|
|
|
Do(InstanceCall(InstanceCallDescriptor::FromToken(Token::kASSIGN_INDEX),
|
2016-11-08 21:54:47 +00:00
|
|
|
stack_push, stack_pointer_push, value_push));
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
Definition* IRRegExpMacroAssembler::PopStack() {
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* stack_push = PushLocal(stack_);
|
|
|
|
Value* stack_pointer_push1 = PushLocal(stack_pointer_);
|
|
|
|
Value* stack_pointer_push2 = PushLocal(stack_pointer_);
|
|
|
|
StoreLocal(stack_pointer_,
|
|
|
|
Bind(Sub(stack_pointer_push2, Bind(Uint64Constant(1)))));
|
2015-07-07 21:43:32 +00:00
|
|
|
return InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
|
2016-11-08 21:54:47 +00:00
|
|
|
stack_push, stack_pointer_push1);
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
Definition* IRRegExpMacroAssembler::PeekStack() {
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* stack_push = PushLocal(stack_);
|
|
|
|
Value* stack_pointer_push = PushLocal(stack_pointer_);
|
2015-07-07 21:43:32 +00:00
|
|
|
return InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
|
2016-11-08 21:54:47 +00:00
|
|
|
stack_push, stack_pointer_push);
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Pushes the location corresponding to label to the backtracking stack.
|
|
|
|
void IRRegExpMacroAssembler::PushBacktrack(BlockLabel* label) {
|
|
|
|
TAG();
|
|
|
|
|
|
|
|
// Ensure that targets of indirect jumps are never accessed through a
|
|
|
|
// normal control flow instructions by creating a new block for each backtrack
|
|
|
|
// target.
|
|
|
|
IndirectEntryInstr* indirect_target = IndirectWithJoinGoto(label->block());
|
|
|
|
|
|
|
|
// Add a fake edge from the graph entry for data flow analysis.
|
|
|
|
entry_block_->AddIndirectEntry(indirect_target);
|
|
|
|
|
|
|
|
ConstantInstr* offset = Uint64Constant(indirect_target->indirect_id());
|
|
|
|
PushStack(offset);
|
|
|
|
CheckStackLimit();
|
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::PushCurrentPosition() {
|
|
|
|
TAG();
|
|
|
|
PushStack(LoadLocal(current_position_));
|
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::PushRegister(intptr_t reg) {
|
|
|
|
TAG();
|
|
|
|
// TODO(zerny): Refactor PushStack so it can be reused here.
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* stack_push = PushLocal(stack_);
|
|
|
|
Value* stack_pointer_push = PushLocal(stack_pointer_);
|
|
|
|
StoreLocal(stack_pointer_,
|
|
|
|
Bind(Add(stack_pointer_push, Bind(Uint64Constant(1)))));
|
2015-07-07 21:43:32 +00:00
|
|
|
stack_pointer_push = PushLocal(stack_pointer_);
|
|
|
|
// TODO(zerny): bind value and push could break stack discipline.
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* value_push = LoadRegister(reg);
|
2015-07-07 21:43:32 +00:00
|
|
|
Do(InstanceCall(InstanceCallDescriptor::FromToken(Token::kASSIGN_INDEX),
|
2016-11-08 21:54:47 +00:00
|
|
|
stack_push, stack_pointer_push, value_push));
|
2015-07-07 21:43:32 +00:00
|
|
|
CheckStackLimit();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Checks that (stack.capacity - stack_limit_slack) > stack_pointer.
|
|
|
|
// This ensures that up to stack_limit_slack stack pushes can be
|
|
|
|
// done without exhausting the stack space. If the check fails the
|
|
|
|
// stack will be grown.
|
|
|
|
void IRRegExpMacroAssembler::CheckStackLimit() {
|
|
|
|
TAG();
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* stack_push = PushLocal(stack_);
|
|
|
|
Value* length_push =
|
2016-11-08 21:54:47 +00:00
|
|
|
Bind(InstanceCall(InstanceCallDescriptor(String::ZoneHandle(
|
|
|
|
Field::GetterSymbol(Symbols::Length()))),
|
2020-01-09 01:37:27 +00:00
|
|
|
stack_push));
|
|
|
|
Value* capacity_push =
|
|
|
|
Bind(Sub(length_push, Bind(Uint64Constant(stack_limit_slack()))));
|
|
|
|
Value* stack_pointer_push = PushLocal(stack_pointer_);
|
2017-05-25 17:12:19 +00:00
|
|
|
BranchInstr* branch = new (Z) BranchInstr(
|
|
|
|
Comparison(kGT, capacity_push, stack_pointer_push), GetNextDeoptId());
|
2015-07-07 21:43:32 +00:00
|
|
|
CloseBlockWith(branch);
|
|
|
|
|
|
|
|
BlockLabel grow_stack;
|
|
|
|
BlockLabel fallthrough;
|
2016-11-08 21:54:47 +00:00
|
|
|
*branch->true_successor_address() = TargetWithJoinGoto(fallthrough.block());
|
|
|
|
*branch->false_successor_address() = TargetWithJoinGoto(grow_stack.block());
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
BindBlock(&grow_stack);
|
|
|
|
GrowStack();
|
|
|
|
|
|
|
|
BindBlock(&fallthrough);
|
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::GrowStack() {
|
|
|
|
TAG();
|
2017-06-23 10:51:52 +00:00
|
|
|
const Library& lib = Library::Handle(Library::InternalLibrary());
|
|
|
|
const Function& grow_function = Function::ZoneHandle(
|
|
|
|
Z, lib.LookupFunctionAllowPrivate(Symbols::GrowRegExpStack()));
|
2017-09-28 19:43:32 +00:00
|
|
|
StoreLocal(stack_, Bind(StaticCall(grow_function, PushLocal(stack_),
|
|
|
|
ICData::kStatic)));
|
2017-06-23 10:51:52 +00:00
|
|
|
|
|
|
|
// Note: :stack and stack_array_cell content might diverge because each
|
|
|
|
// instance of :matcher code has its own stack_array_cell embedded into it
|
|
|
|
// as a constant but :stack is a local variable and its value might be
|
|
|
|
// comming from OSR or deoptimization. This means we should never use
|
|
|
|
// stack_array_cell in the body of the :matcher to reload the :stack.
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* stack_cell_push = Bind(new (Z) ConstantInstr(stack_array_cell_));
|
|
|
|
Value* index_push = Bind(Uint64Constant(0));
|
|
|
|
Value* stack_push = PushLocal(stack_);
|
2017-06-23 10:51:52 +00:00
|
|
|
Do(InstanceCall(InstanceCallDescriptor::FromToken(Token::kASSIGN_INDEX),
|
|
|
|
stack_cell_push, index_push, stack_push));
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::ReadCurrentPositionFromRegister(intptr_t reg) {
|
|
|
|
TAG();
|
|
|
|
StoreLocal(current_position_, LoadRegister(reg));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Resets the tip of the stack to the value stored in reg.
|
|
|
|
void IRRegExpMacroAssembler::ReadStackPointerFromRegister(intptr_t reg) {
|
|
|
|
TAG();
|
|
|
|
ASSERT(reg < registers_count_);
|
|
|
|
StoreLocal(stack_pointer_, LoadRegister(reg));
|
|
|
|
}
|
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
void IRRegExpMacroAssembler::SetCurrentPositionFromEnd(intptr_t by) {
|
2015-07-07 21:43:32 +00:00
|
|
|
TAG();
|
|
|
|
|
|
|
|
BlockLabel after_position;
|
|
|
|
|
|
|
|
Definition* cur_pos_def = LoadLocal(current_position_);
|
|
|
|
Definition* by_value_def = Int64Constant(-by);
|
|
|
|
|
|
|
|
BranchOrBacktrack(Comparison(kGTE, cur_pos_def, by_value_def),
|
|
|
|
&after_position);
|
|
|
|
|
|
|
|
StoreLocal(current_position_, Bind(Int64Constant(-by)));
|
|
|
|
|
|
|
|
// On RegExp code entry (where this operation is used), the character before
|
|
|
|
// the current position is expected to be already loaded.
|
|
|
|
// We have advanced the position, so it's safe to read backwards.
|
|
|
|
LoadCurrentCharacterUnchecked(-1, 1);
|
|
|
|
|
|
|
|
BindBlock(&after_position);
|
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::SetRegister(intptr_t reg, intptr_t to) {
|
|
|
|
TAG();
|
|
|
|
// Reserved for positions!
|
|
|
|
ASSERT(reg >= saved_registers_count_);
|
|
|
|
StoreRegister(reg, to);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool IRRegExpMacroAssembler::Succeed() {
|
|
|
|
TAG();
|
|
|
|
GoTo(success_block_);
|
|
|
|
return global();
|
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::WriteCurrentPositionToRegister(
|
2016-11-08 21:54:47 +00:00
|
|
|
intptr_t reg,
|
|
|
|
intptr_t cp_offset) {
|
2015-07-07 21:43:32 +00:00
|
|
|
TAG();
|
|
|
|
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* registers_push = PushLocal(registers_);
|
|
|
|
Value* index_push = PushRegisterIndex(reg);
|
|
|
|
Value* pos_push = PushLocal(current_position_);
|
|
|
|
Value* off_push = Bind(Int64Constant(cp_offset));
|
|
|
|
Value* neg_off_push = Bind(Add(pos_push, off_push));
|
2015-07-07 21:43:32 +00:00
|
|
|
// Push the negative offset; these are converted to positive string positions
|
|
|
|
// within the success block.
|
|
|
|
StoreRegister(registers_push, index_push, neg_off_push);
|
|
|
|
}
|
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
void IRRegExpMacroAssembler::ClearRegisters(intptr_t reg_from,
|
|
|
|
intptr_t reg_to) {
|
2015-07-07 21:43:32 +00:00
|
|
|
TAG();
|
|
|
|
|
|
|
|
ASSERT(reg_from <= reg_to);
|
|
|
|
|
|
|
|
// In order to clear registers to a final result value of -1, set them to
|
|
|
|
// (-1 - string length), the offset of -1 from the end of the string.
|
|
|
|
|
|
|
|
for (intptr_t reg = reg_from; reg <= reg_to; reg++) {
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* registers_push = PushLocal(registers_);
|
|
|
|
Value* index_push = PushRegisterIndex(reg);
|
|
|
|
Value* minus_one_push = Bind(Int64Constant(-1));
|
|
|
|
Value* length_push = PushLocal(string_param_length_);
|
|
|
|
Value* value_push = Bind(Sub(minus_one_push, length_push));
|
2015-07-07 21:43:32 +00:00
|
|
|
StoreRegister(registers_push, index_push, value_push);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::WriteStackPointerToRegister(intptr_t reg) {
|
|
|
|
TAG();
|
|
|
|
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* registers_push = PushLocal(registers_);
|
|
|
|
Value* index_push = PushRegisterIndex(reg);
|
|
|
|
Value* tip_push = PushLocal(stack_pointer_);
|
2015-07-07 21:43:32 +00:00
|
|
|
StoreRegister(registers_push, index_push, tip_push);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Private methods:
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::CheckPosition(intptr_t cp_offset,
|
|
|
|
BlockLabel* on_outside_input) {
|
|
|
|
TAG();
|
[VM] Adding regexp lookbehind assertion support.
See https://github.com/tc39/proposal-regexp-lookbehind
for a high-level description of the feature and examples. This is one of the
features requested in https://github.com/dart-lang/sdk/issues/34935.
This work takes the feature as present in the v8 engine and appropriately
merges it into our irregexp fork. Notable changes to the irregexp codebase to
introduce this feature:
-----
We can no longer assume that all matching proceeds forwards, since lookbehind
matching proceeds backwards. Similarly, we cannot assume that we can only be
at the start of a string if we started matching from that point. The direction
of matching must also be taken into consideration when doing bounds checking,
which previously assumed the engine would never attempt to look before the
start of a string.
-----
We may now parse backreferences to captures before the capture they
reference, since we parse regular expressions left to right, but lookbehinds
perform captures as they evaluate the string from right to left. Since
RegExpBackReference objects contain a pointer to their corresponding capture,
this means that we may need to create RegExpCapture objects prior to the
parsing of the corresponding captured subexpression.
Thus, RegExpCapture objects are now only initialized with their index, and the
body is set later when the subexpression is encountered and parsed. This means
any method that operates on the body of a RegExpCapture can no longer be const,
which also affects the rest of the RegExpTree class hierarchy. This also means
that we don't have a valid max_match length for backreferences based off the
capture body, and must assume they can end up being any length.
-----
Change-Id: Iffe0e71b17b1a0c6fea77235e8aee5c093005811
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/94540
Commit-Queue: Stevie Strickland <sstrickl@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
2019-03-14 14:26:47 +00:00
|
|
|
if (cp_offset >= 0) {
|
|
|
|
Definition* curpos_def = LoadLocal(current_position_);
|
|
|
|
Definition* cp_off_def = Int64Constant(-cp_offset);
|
|
|
|
// If (current_position_ < -cp_offset), we are in bounds.
|
|
|
|
// Remember, current_position_ is a negative offset from the string end.
|
|
|
|
|
|
|
|
BranchOrBacktrack(Comparison(kGTE, curpos_def, cp_off_def),
|
|
|
|
on_outside_input);
|
|
|
|
} else {
|
|
|
|
// We need to see if there's enough characters left in the string to go
|
|
|
|
// back cp_offset characters, so get the normalized position and then
|
|
|
|
// make sure that (normalized_position >= -cp_offset).
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* pos_push = PushLocal(current_position_);
|
|
|
|
Value* len_push = PushLocal(string_param_length_);
|
[VM] Adding regexp lookbehind assertion support.
See https://github.com/tc39/proposal-regexp-lookbehind
for a high-level description of the feature and examples. This is one of the
features requested in https://github.com/dart-lang/sdk/issues/34935.
This work takes the feature as present in the v8 engine and appropriately
merges it into our irregexp fork. Notable changes to the irregexp codebase to
introduce this feature:
-----
We can no longer assume that all matching proceeds forwards, since lookbehind
matching proceeds backwards. Similarly, we cannot assume that we can only be
at the start of a string if we started matching from that point. The direction
of matching must also be taken into consideration when doing bounds checking,
which previously assumed the engine would never attempt to look before the
start of a string.
-----
We may now parse backreferences to captures before the capture they
reference, since we parse regular expressions left to right, but lookbehinds
perform captures as they evaluate the string from right to left. Since
RegExpBackReference objects contain a pointer to their corresponding capture,
this means that we may need to create RegExpCapture objects prior to the
parsing of the corresponding captured subexpression.
Thus, RegExpCapture objects are now only initialized with their index, and the
body is set later when the subexpression is encountered and parsed. This means
any method that operates on the body of a RegExpCapture can no longer be const,
which also affects the rest of the RegExpTree class hierarchy. This also means
that we don't have a valid max_match length for backreferences based off the
capture body, and must assume they can end up being any length.
-----
Change-Id: Iffe0e71b17b1a0c6fea77235e8aee5c093005811
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/94540
Commit-Queue: Stevie Strickland <sstrickl@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
2019-03-14 14:26:47 +00:00
|
|
|
BranchOrBacktrack(
|
|
|
|
Comparison(kLT, Add(pos_push, len_push), Uint64Constant(-cp_offset)),
|
|
|
|
on_outside_input);
|
|
|
|
}
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
void IRRegExpMacroAssembler::BranchOrBacktrack(ComparisonInstr* comparison,
|
|
|
|
BlockLabel* true_successor) {
|
2015-07-07 21:43:32 +00:00
|
|
|
if (comparison == NULL) { // No condition
|
|
|
|
if (true_successor == NULL) {
|
|
|
|
Backtrack();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
GoTo(true_successor);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If no successor block has been passed in, backtrack.
|
|
|
|
JoinEntryInstr* true_successor_block = backtrack_block_;
|
|
|
|
if (true_successor != NULL) {
|
|
|
|
true_successor->SetLinked();
|
|
|
|
true_successor_block = true_successor->block();
|
|
|
|
}
|
|
|
|
ASSERT(true_successor_block != NULL);
|
|
|
|
|
|
|
|
// If the condition is not true, fall through to a new block.
|
|
|
|
BlockLabel fallthrough;
|
|
|
|
|
2017-05-25 17:12:19 +00:00
|
|
|
BranchInstr* branch = new (Z) BranchInstr(comparison, GetNextDeoptId());
|
2016-11-08 21:54:47 +00:00
|
|
|
*branch->true_successor_address() = TargetWithJoinGoto(true_successor_block);
|
|
|
|
*branch->false_successor_address() = TargetWithJoinGoto(fallthrough.block());
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
CloseBlockWith(branch);
|
|
|
|
BindBlock(&fallthrough);
|
|
|
|
}
|
|
|
|
|
|
|
|
TargetEntryInstr* IRRegExpMacroAssembler::TargetWithJoinGoto(
|
|
|
|
JoinEntryInstr* dst) {
|
2017-05-25 17:12:19 +00:00
|
|
|
TargetEntryInstr* target = new (Z)
|
|
|
|
TargetEntryInstr(block_id_.Alloc(), kInvalidTryIndex, GetNextDeoptId());
|
2015-07-07 21:43:32 +00:00
|
|
|
blocks_.Add(target);
|
|
|
|
|
2017-05-25 17:12:19 +00:00
|
|
|
target->AppendInstruction(new (Z) GotoInstr(dst, GetNextDeoptId()));
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
return target;
|
|
|
|
}
|
|
|
|
|
|
|
|
IndirectEntryInstr* IRRegExpMacroAssembler::IndirectWithJoinGoto(
|
|
|
|
JoinEntryInstr* dst) {
|
2017-05-25 17:12:19 +00:00
|
|
|
IndirectEntryInstr* target =
|
|
|
|
new (Z) IndirectEntryInstr(block_id_.Alloc(), indirect_id_.Alloc(),
|
|
|
|
kInvalidTryIndex, GetNextDeoptId());
|
2015-07-07 21:43:32 +00:00
|
|
|
blocks_.Add(target);
|
|
|
|
|
2017-05-25 17:12:19 +00:00
|
|
|
target->AppendInstruction(new (Z) GotoInstr(dst, GetNextDeoptId()));
|
2015-07-07 21:43:32 +00:00
|
|
|
|
|
|
|
return target;
|
|
|
|
}
|
|
|
|
|
2017-06-23 10:51:52 +00:00
|
|
|
void IRRegExpMacroAssembler::CheckPreemption(bool is_backtrack) {
|
2015-07-07 21:43:32 +00:00
|
|
|
TAG();
|
2017-06-23 10:51:52 +00:00
|
|
|
|
|
|
|
// We don't have the loop_depth available when compiling regexps, but
|
|
|
|
// we set loop_depth to a non-zero value because this instruction does
|
|
|
|
// not act as an OSR entry outside loops.
|
|
|
|
AppendInstruction(new (Z) CheckStackOverflowInstr(
|
[vm/compiler] Copy inlined IDs with token positions.
When creating new instructions that inherit a token position that
represents a source location from another instruction, the inheriting
instruction must also have the same inlining ID in order for the source
position represented by the token position to be looked up in the
correct script.
Force this by wrapping both in a single InstructionSource struct which
is taken by instructions which take token positions instead of just a
token position. That way, it's more work to manually transfer the token
position separately from the inlining ID of the instruction than doing
the right thing of transfering both at once.
To ensure this information is kept consistent, we pass InstructionSource
structs through the FlowGraphCompiler all the way down to the
CodeSourceMapBuilder.
This CL also makes the following changes:
* Cache the upper bound of source positions in scripts and use it to add
a check for if a given real token position is valid for the script
without iterating over the line starts data for each token position.
* Start inlining intervals appropriately when adding descriptor and
null check information to the code source map.
Code size changes are minimal on Flutter gallery in release mode
(<0.05% decrease).
TEST=Existing tests on trybots, with manual checking with
--check-token-positions that previous errors are now removed.
Bug: https://github.com/dart-lang/sdk/issues/44436
Cq-Include-Trybots: luci.dart.try:vm-kernel-precomp-nnbd-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-kernel-nnbd-linux-debug-x64-try,vm-kernel-linux-debug-x64-try,vm-kernel-linux-release-x64-try,vm-kernel-nnbd-linux-release-x64-try,vm-kernel-precomp-linux-release-x64-try,vm-kernel-precomp-nnbd-linux-release-x64-try,vm-kernel-linux-product-x64-try,vm-kernel-precomp-linux-product-x64-try,vm-kernel-precomp-linux-debug-simarm_x64-try,vm-kernel-precomp-linux-release-simarm64-try,vm-kernel-linux-release-simarm64-try,vm-kernel-linux-release-simarm-try,vm-kernel-linux-release-ia32-try
Change-Id: I23ced262cb4e9fe9d81356f409e7e8d220d63ee0
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/173967
Reviewed-by: Régis Crelier <regis@google.com>
2020-12-18 00:42:14 +00:00
|
|
|
InstructionSource(),
|
2019-04-10 19:40:49 +00:00
|
|
|
/*stack_depth=*/0,
|
2017-06-23 10:51:52 +00:00
|
|
|
/*loop_depth=*/1, GetNextDeoptId(),
|
|
|
|
is_backtrack ? CheckStackOverflowInstr::kOsrAndPreemption
|
|
|
|
: CheckStackOverflowInstr::kOsrOnly));
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
2020-01-09 01:37:27 +00:00
|
|
|
Definition* IRRegExpMacroAssembler::Add(Value* lhs, Value* rhs) {
|
2015-07-07 21:43:32 +00:00
|
|
|
return InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD), lhs, rhs);
|
|
|
|
}
|
|
|
|
|
2020-01-09 01:37:27 +00:00
|
|
|
Definition* IRRegExpMacroAssembler::Sub(Value* lhs, Value* rhs) {
|
2015-07-07 21:43:32 +00:00
|
|
|
return InstanceCall(InstanceCallDescriptor::FromToken(Token::kSUB), lhs, rhs);
|
|
|
|
}
|
|
|
|
|
|
|
|
void IRRegExpMacroAssembler::LoadCurrentCharacterUnchecked(
|
2016-11-08 21:54:47 +00:00
|
|
|
intptr_t cp_offset,
|
|
|
|
intptr_t characters) {
|
2015-07-07 21:43:32 +00:00
|
|
|
TAG();
|
|
|
|
|
|
|
|
ASSERT(characters == 1 || CanReadUnaligned());
|
|
|
|
if (mode_ == ASCII) {
|
|
|
|
ASSERT(characters == 1 || characters == 2 || characters == 4);
|
|
|
|
} else {
|
|
|
|
ASSERT(mode_ == UC16);
|
|
|
|
ASSERT(characters == 1 || characters == 2);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Calculate the addressed string index as:
|
|
|
|
// cp_offset + current_position_ + string_param_length_
|
|
|
|
// TODO(zerny): Avoid generating 'add' instance-calls here.
|
2020-01-09 01:37:27 +00:00
|
|
|
Value* off_arg = Bind(Int64Constant(cp_offset));
|
|
|
|
Value* pos_arg = BindLoadLocal(*current_position_);
|
|
|
|
Value* off_pos_arg = Bind(Add(off_arg, pos_arg));
|
|
|
|
Value* len_arg = BindLoadLocal(*string_param_length_);
|
2015-07-07 21:43:32 +00:00
|
|
|
// Index is stored in a temporary local so that we can later load it safely.
|
|
|
|
StoreLocal(index_temp_, Bind(Add(off_pos_arg, len_arg)));
|
|
|
|
|
|
|
|
// Load and store the code units.
|
|
|
|
Value* code_unit_value = LoadCodeUnitsAt(index_temp_, characters);
|
|
|
|
StoreLocal(current_character_, code_unit_value);
|
|
|
|
PRINT(PushLocal(current_character_));
|
|
|
|
}
|
|
|
|
|
|
|
|
Value* IRRegExpMacroAssembler::CharacterAt(LocalVariable* index) {
|
|
|
|
return LoadCodeUnitsAt(index, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
Value* IRRegExpMacroAssembler::LoadCodeUnitsAt(LocalVariable* index,
|
|
|
|
intptr_t characters) {
|
|
|
|
// Bind the pattern as the load receiver.
|
|
|
|
Value* pattern_val = BindLoadLocal(*string_param_);
|
2020-04-21 20:09:55 +00:00
|
|
|
if (IsExternalStringClassId(specialization_cid_)) {
|
2018-06-04 23:44:39 +00:00
|
|
|
// The data of an external string is stored through one indirection.
|
2015-07-07 21:43:32 +00:00
|
|
|
intptr_t external_offset = 0;
|
|
|
|
if (specialization_cid_ == kExternalOneByteStringCid) {
|
|
|
|
external_offset = ExternalOneByteString::external_data_offset();
|
|
|
|
} else if (specialization_cid_ == kExternalTwoByteStringCid) {
|
|
|
|
external_offset = ExternalTwoByteString::external_data_offset();
|
|
|
|
} else {
|
|
|
|
UNREACHABLE();
|
|
|
|
}
|
2018-06-04 23:44:39 +00:00
|
|
|
// This pushes an untagged value on the stack which is immediately consumed
|
2015-07-07 21:43:32 +00:00
|
|
|
// by LoadCodeUnitsAtInstr below.
|
2018-06-04 23:44:39 +00:00
|
|
|
pattern_val = Bind(new (Z) LoadUntaggedInstr(pattern_val, external_offset));
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Here pattern_val might be untagged so this must not trigger a GC.
|
|
|
|
Value* index_val = BindLoadLocal(*index);
|
|
|
|
|
[vm/compiler] Copy inlined IDs with token positions.
When creating new instructions that inherit a token position that
represents a source location from another instruction, the inheriting
instruction must also have the same inlining ID in order for the source
position represented by the token position to be looked up in the
correct script.
Force this by wrapping both in a single InstructionSource struct which
is taken by instructions which take token positions instead of just a
token position. That way, it's more work to manually transfer the token
position separately from the inlining ID of the instruction than doing
the right thing of transfering both at once.
To ensure this information is kept consistent, we pass InstructionSource
structs through the FlowGraphCompiler all the way down to the
CodeSourceMapBuilder.
This CL also makes the following changes:
* Cache the upper bound of source positions in scripts and use it to add
a check for if a given real token position is valid for the script
without iterating over the line starts data for each token position.
* Start inlining intervals appropriately when adding descriptor and
null check information to the code source map.
Code size changes are minimal on Flutter gallery in release mode
(<0.05% decrease).
TEST=Existing tests on trybots, with manual checking with
--check-token-positions that previous errors are now removed.
Bug: https://github.com/dart-lang/sdk/issues/44436
Cq-Include-Trybots: luci.dart.try:vm-kernel-precomp-nnbd-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-kernel-nnbd-linux-debug-x64-try,vm-kernel-linux-debug-x64-try,vm-kernel-linux-release-x64-try,vm-kernel-nnbd-linux-release-x64-try,vm-kernel-precomp-linux-release-x64-try,vm-kernel-precomp-nnbd-linux-release-x64-try,vm-kernel-linux-product-x64-try,vm-kernel-precomp-linux-product-x64-try,vm-kernel-precomp-linux-debug-simarm_x64-try,vm-kernel-precomp-linux-release-simarm64-try,vm-kernel-linux-release-simarm64-try,vm-kernel-linux-release-simarm-try,vm-kernel-linux-release-ia32-try
Change-Id: I23ced262cb4e9fe9d81356f409e7e8d220d63ee0
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/173967
Reviewed-by: Régis Crelier <regis@google.com>
2020-12-18 00:42:14 +00:00
|
|
|
return Bind(new (Z)
|
|
|
|
LoadCodeUnitsInstr(pattern_val, index_val, characters,
|
|
|
|
specialization_cid_, InstructionSource()));
|
2015-07-07 21:43:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#undef __
|
|
|
|
|
|
|
|
} // namespace dart
|
2017-09-04 11:13:21 +00:00
|
|
|
|
|
|
|
#endif // !defined(DART_PRECOMPILED_RUNTIME)
|