[vm/regexp] Share RegExp objects between isolates, keep isolate-specific state in static members.

Revert "[vm/regexp] Copy, rather than share RegExp objects between isolates." is in patchset 1.

Isolate RegEx-sharing benchmarks show 24-100x improvements.

Bug: https://github.com/dart-lang/sdk/issues/50639
TEST=ci
Change-Id: I87943c9b9928869f7a20052cb0dc71d24ad36087
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/276240
Commit-Queue: Alexander Aprelev <aam@google.com>
Reviewed-by: Ryan Macnak <rmacnak@google.com>
This commit is contained in:
Alexander Aprelev 2022-12-20 02:28:04 +00:00 committed by Commit Queue
parent 8ad766159f
commit 438b81c4f2
10 changed files with 82 additions and 159 deletions

View file

@ -70,6 +70,7 @@ final sharableObjects = [
const [1, 2, 3],
const {1: 1, 2: 2, 3: 2},
const {1, 2, 3},
RegExp('a'),
Isolate.current.pauseCapability,
Int32x4(1, 2, 3, 4),
Float32x4(1.0, 2.0, 3.0, 4.0),

View file

@ -71,6 +71,7 @@ final sharableObjects = [
const [1, 2, 3],
const {1: 1, 2: 2, 3: 2},
const {1, 2, 3},
RegExp('a'),
Isolate.current.pauseCapability,
Int32x4(1, 2, 3, 4),
Float32x4(1.0, 2.0, 3.0, 4.0),

View file

@ -1105,20 +1105,14 @@ ConstantInstr::ConstantInstr(const Object& value,
if (!value.IsNull() && !value.IsSmi() && value.IsInstance() &&
!value.IsCanonical() && (value.ptr() != Object::sentinel().ptr())) {
// Arrays in ConstantInstrs are usually immutable and canonicalized, but
// there are at least a couple of cases where one or both is not true:
// the Arrays created as backing for ArgumentsDescriptors may not be
// canonicalized for space reasons when inlined in the IL. However, they
// are still immutable.
//
// * The Arrays created as backing for ArgumentsDescriptors may not be
// canonicalized for space reasons when inlined in the IL. However, they
// are still immutable.
// * The backtracking stack for IRRegExps is put into a ConstantInstr for
// immediate use as an argument to the operations on that stack. In this
// case, the Array representing it is neither immutable or canonical.
//
// In addition to complicating the story for Arrays, IRRegExp compilation
// also uses other non-canonical values as "constants". For example, the bit
// tables used for certain character classes are represented as TypedData,
// and so those values are also neither immutable (as there are no immutable
// TypedData values) or canonical.
// IRRegExp compilation uses TypeData non-canonical values as "constants".
// Specifically, the bit tables used for certain character classes are
// represented as TypedData, and so those values are also neither immutable
// (as there are no immutable TypedData values) or canonical.
//
// LibraryPrefixes are also never canonicalized since their equality is
// their identity.

View file

@ -10,7 +10,6 @@
#include "vm/longjump.h"
#include "vm/object.h"
#include "vm/object_store.h"
#include "vm/regexp.h"
#include "vm/snapshot.h"
#include "vm/symbols.h"
#include "vm/timeline.h"
@ -69,6 +68,7 @@
V(Pointer) \
V(ReceivePort) \
V(RecordType) \
V(RegExp) \
V(Script) \
V(Sentinel) \
V(SendPort) \
@ -165,13 +165,7 @@ static bool CanShareObject(ObjectPtr obj, uword tags) {
}
if (cid == kSendPortCid) return true;
if (cid == kCapabilityCid) return true;
// Generated code for regexp can't be shared.
#if defined(DART_PRECOMPILED_RUNTIME)
if (cid == kRegExpCid) return true;
#else
if (FLAG_interpret_irregexp && cid == kRegExpCid) return true;
#endif
if (cid == kClosureCid) {
// We can share a closure iff it doesn't close over any state.
@ -215,11 +209,7 @@ static bool MightNeedReHashing(ObjectPtr object) {
// a map or a value in a set, they will already have the identity hash code
// set.
if (cid == kImmutableArrayCid) return false;
#if defined(DART_PRECOMPILED_RUNTIME)
if (cid == kRegExpCid) return false;
#else
if (FLAG_interpret_irregexp && cid == kRegExpCid) return false;
#endif
if (cid == kInt32x4Cid) return false;
// If the [tags] indicates this is a canonical object we'll share it instead
@ -659,7 +649,6 @@ class FastForwardMap : public ForwardMapBase {
void AddExternalTypedData(ExternalTypedDataPtr to) {
raw_external_typed_data_to_.Add(to);
}
void AddRegExp(RegExpPtr to) { raw_reg_exp_to_.Add(to); }
void AddObjectToRehash(ObjectPtr to) { raw_objects_to_rehash_.Add(to); }
void AddExpandoToRehash(ObjectPtr to) { raw_expandos_to_rehash_.Add(to); }
@ -671,7 +660,6 @@ class FastForwardMap : public ForwardMapBase {
IdentityMap* map_;
GrowableArray<ObjectPtr> raw_from_to_;
GrowableArray<TransferableTypedDataPtr> raw_transferables_from_to_;
GrowableArray<RegExpPtr> raw_reg_exp_to_;
GrowableArray<ExternalTypedDataPtr> raw_external_typed_data_to_;
GrowableArray<ObjectPtr> raw_objects_to_rehash_;
GrowableArray<ObjectPtr> raw_expandos_to_rehash_;
@ -714,7 +702,6 @@ class SlowForwardMap : public ForwardMapBase {
transferables_from_to_.Add(&TransferableTypedData::Handle(from.ptr()));
transferables_from_to_.Add(&TransferableTypedData::Handle(to.ptr()));
}
void AddRegExp(const RegExp& to) { reg_exps_.Add(&RegExp::Handle(to.ptr())); }
void AddWeakProperty(const WeakProperty& from) {
weak_properties_.Add(&WeakProperty::Handle(from.ptr()));
}
@ -741,29 +728,6 @@ class SlowForwardMap : public ForwardMapBase {
}
}
void FinalizeRegExps() {
if (FLAG_interpret_irregexp) {
return;
}
if (reg_exps_.length() == 0) {
return;
}
const Library& lib = Library::Handle(zone_, Library::CoreLibrary());
const Class& owner =
Class::Handle(zone_, lib.LookupClass(Symbols::RegExp()));
for (intptr_t i = 0; i < reg_exps_.length(); i++) {
auto regexp = reg_exps_[i];
for (intptr_t cid = kOneByteStringCid; cid <= kExternalTwoByteStringCid;
cid++) {
CreateSpecializedFunction(thread_, zone_, *regexp, cid,
/*sticky=*/false, owner);
CreateSpecializedFunction(thread_, zone_, *regexp, cid,
/*sticky=*/true, owner);
}
}
}
void FinalizeExternalTypedData() {
for (intptr_t i = 0; i < external_typed_data_.length(); i++) {
auto to = external_typed_data_[i];
@ -780,7 +744,6 @@ class SlowForwardMap : public ForwardMapBase {
GrowableArray<const PassiveObject*> from_to_transition_;
GrowableObjectArray& from_to_;
GrowableArray<const TransferableTypedData*> transferables_from_to_;
GrowableArray<const RegExp*> reg_exps_;
GrowableArray<const ExternalTypedData*> external_typed_data_;
GrowableArray<const Object*> objects_to_rehash_;
GrowableArray<const Object*> expandos_to_rehash_;
@ -1055,7 +1018,6 @@ class FastObjectCopyBase : public ObjectCopyBase {
TransferableTypedDataPtr to) {
fast_forward_map_.AddTransferable(from, to);
}
void EnqueueRegExp(RegExpPtr to) { fast_forward_map_.AddRegExp(to); }
void EnqueueWeakProperty(WeakPropertyPtr from) {
fast_forward_map_.AddWeakProperty(from);
}
@ -1261,7 +1223,6 @@ class SlowObjectCopyBase : public ObjectCopyBase {
const TransferableTypedData& to) {
slow_forward_map_.AddTransferable(from, to);
}
void EnqueueRegExp(const RegExp& to) { slow_forward_map_.AddRegExp(to); }
void EnqueueWeakProperty(const WeakProperty& from) {
slow_forward_map_.AddWeakProperty(from);
}
@ -1474,49 +1435,6 @@ class ObjectCopy : public Base {
Record::field_offset(0) + Record::kBytesPerElement * num_fields);
}
void CopyRegExp(typename Types::RegExp from, typename Types::RegExp to) {
Base::StoreCompressedPointers(from, to,
OFFSET_OF(UntaggedRegExp, capture_name_map_),
OFFSET_OF(UntaggedRegExp, pattern_));
UntagRegExp(to)->num_bracket_expressions_ =
UntagRegExp(from)->num_bracket_expressions_;
UntagRegExp(to)->num_one_byte_registers_ =
UntagRegExp(from)->num_one_byte_registers_;
UntagRegExp(to)->num_two_byte_registers_ =
UntagRegExp(from)->num_two_byte_registers_;
UntagRegExp(to)->type_flags_ = UntagRegExp(from)->type_flags_;
Base::StoreCompressedPointerNoBarrier(Types::GetRegExpPtr(to),
OFFSET_OF(UntaggedRegExp, one_byte_),
Object::null());
Base::StoreCompressedPointerNoBarrier(Types::GetRegExpPtr(to),
OFFSET_OF(UntaggedRegExp, one_byte_),
Object::null());
Base::StoreCompressedPointerNoBarrier(Types::GetRegExpPtr(to),
OFFSET_OF(UntaggedRegExp, two_byte_),
Object::null());
Base::StoreCompressedPointerNoBarrier(
Types::GetRegExpPtr(to), OFFSET_OF(UntaggedRegExp, external_one_byte_),
Object::null());
Base::StoreCompressedPointerNoBarrier(
Types::GetRegExpPtr(to), OFFSET_OF(UntaggedRegExp, external_two_byte_),
Object::null());
Base::StoreCompressedPointerNoBarrier(
Types::GetRegExpPtr(to), OFFSET_OF(UntaggedRegExp, one_byte_sticky_),
Object::null());
Base::StoreCompressedPointerNoBarrier(
Types::GetRegExpPtr(to), OFFSET_OF(UntaggedRegExp, two_byte_sticky_),
Object::null());
Base::StoreCompressedPointerNoBarrier(
Types::GetRegExpPtr(to),
OFFSET_OF(UntaggedRegExp, external_one_byte_sticky_), Object::null());
Base::StoreCompressedPointerNoBarrier(
Types::GetRegExpPtr(to),
OFFSET_OF(UntaggedRegExp, external_two_byte_sticky_), Object::null());
Base::EnqueueRegExp(to);
}
template <intptr_t one_for_set_two_for_map, typename T>
void CopyLinkedHashBase(T from,
T to,
@ -2136,7 +2054,6 @@ class ObjectGraphCopier : public StackResource {
// The copy was successful, then detach transferable data from the sender
// and attach to the copied graph.
slow_object_copy_.slow_forward_map_.FinalizeTransferables();
slow_object_copy_.slow_forward_map_.FinalizeRegExps();
return result.ptr();
}
@ -2182,7 +2099,6 @@ class ObjectGraphCopier : public StackResource {
result_array.SetAt(2, fast_object_copy_.tmp_);
HandlifyExternalTypedData();
HandlifyTransferables();
HandlifyRegExp();
allocated_bytes_ =
fast_object_copy_.fast_forward_map_.allocated_bytes;
copied_objects_ =
@ -2244,7 +2160,6 @@ class ObjectGraphCopier : public StackResource {
HandlifyWeakProperties();
HandlifyWeakReferences();
HandlifyExternalTypedData();
HandlifyRegExp();
HandlifyObjectsToReHash();
HandlifyExpandosToReHash();
HandlifyFromToObjects();
@ -2291,10 +2206,6 @@ class ObjectGraphCopier : public StackResource {
Handlify(&fast_object_copy_.fast_forward_map_.raw_external_typed_data_to_,
&slow_object_copy_.slow_forward_map_.external_typed_data_);
}
void HandlifyRegExp() {
Handlify(&fast_object_copy_.fast_forward_map_.raw_reg_exp_to_,
&slow_object_copy_.slow_forward_map_.reg_exps_);
}
void HandlifyObjectsToReHash() {
Handlify(&fast_object_copy_.fast_forward_map_.raw_objects_to_rehash_,
&slow_object_copy_.slow_forward_map_.objects_to_rehash_);

View file

@ -13,6 +13,7 @@
#include "vm/compiler/backend/il_printer.h"
#include "vm/compiler/frontend/flow_graph_builder.h"
#include "vm/compiler/jit/compiler.h"
#include "vm/compiler/runtime_api.h"
#include "vm/dart_entry.h"
#include "vm/longjump.h"
#include "vm/object_store.h"
@ -31,9 +32,8 @@
}
#define TAG_() \
Print(Bind(new (Z) ConstantInstr(String::ZoneHandle( \
Z, String::Concat(String::Handle(String::New("TAG: ")), \
String::Handle(String::New(__FUNCTION__)), \
Heap::kOld)))));
Z, Symbols::FromConcat(thread_, String::Handle(String::New("TAG: ")), \
String::Handle(String::New(__FUNCTION__)))))));
#define PRINT(arg) \
if (FLAG_trace_irregexp) { \
@ -42,8 +42,6 @@
namespace dart {
static const intptr_t kMinStackSize = 512;
/*
* This assembler uses the following main local variables:
* - stack_: A pointer to a growable list which we use as an all-purpose stack
@ -87,9 +85,6 @@ IRRegExpMacroAssembler::IRRegExpMacroAssembler(
start_index_param_(NULL),
registers_count_(0),
saved_registers_count_((capture_count + 1) * 2),
stack_array_cell_(Array::ZoneHandle(zone, Array::New(1, Heap::kOld))),
// The registers array is allocated at a fixed size after assembly.
registers_array_(TypedData::ZoneHandle(zone, TypedData::null())),
// B0 is taken by GraphEntry thus block ids must start at 1.
block_id_(1) {
switch (specialization_cid) {
@ -107,14 +102,6 @@ IRRegExpMacroAssembler::IRRegExpMacroAssembler(
InitializeLocals();
// Allocate an initial stack backing of the minimum stack size. The stack
// backing is indirectly referred to so we can reuse it on subsequent matches
// even in the case where the backing has been enlarged and thus reallocated.
stack_array_cell_.SetAt(
0,
TypedData::Handle(zone, TypedData::New(kTypedDataInt32ArrayCid,
kMinStackSize / 4, Heap::kOld)));
// Create and generate all preset blocks.
entry_block_ = new (zone) GraphEntryInstr(*parsed_function_, osr_id);
@ -195,16 +182,27 @@ void IRRegExpMacroAssembler::GenerateEntryBlock() {
StoreLocal(current_position_, Bind(Sub(start_index_push, length_push)));
// Generate a local list variable to represent "registers" and
// initialize capture registers (others remain garbage).
StoreLocal(registers_, Bind(new (Z) ConstantInstr(registers_array_)));
{
const Library& lib = Library::ZoneHandle(Z, Library::CoreLibrary());
const Class& regexp_class =
Class::ZoneHandle(Z, lib.LookupClassAllowPrivate(Symbols::_RegExp()));
const Function& get_registers_function = Function::ZoneHandle(
Z, regexp_class.LookupFunctionAllowPrivate(Symbols::_getRegisters()));
// This will be replaced with correct constant value at Finalization.
num_registers_constant_instr = Int64Constant(0);
StoreLocal(registers_, Bind(StaticCall(get_registers_function,
Bind(num_registers_constant_instr),
ICData::kStatic)));
const Function& get_backtracking_stack_function =
Function::ZoneHandle(Z, regexp_class.LookupFunctionAllowPrivate(
Symbols::_getBacktrackingStack()));
StoreLocal(stack_, Bind(StaticCall(get_backtracking_stack_function,
ICData::kStatic)));
}
ClearRegisters(0, saved_registers_count_ - 1);
// Generate a local list variable to represent the backtracking stack.
Value* stack_cell_push = Bind(new (Z) ConstantInstr(stack_array_cell_));
StoreLocal(stack_,
Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
stack_cell_push, Bind(Uint64Constant(0)))));
StoreLocal(stack_pointer_, Bind(Int64Constant(-1)));
// Jump to the start block.
@ -274,8 +272,10 @@ void IRRegExpMacroAssembler::GenerateExitBlock() {
void IRRegExpMacroAssembler::FinalizeRegistersArray() {
ASSERT(registers_count_ >= saved_registers_count_);
registers_array_ =
TypedData::New(kTypedDataInt32ArrayCid, registers_count_, Heap::kOld);
ConstantInstr* new_constant = Int64Constant(registers_count_);
new_constant->set_temp_index(num_registers_constant_instr->temp_index());
num_registers_constant_instr->ReplaceWith(new_constant, /*iterator=*/nullptr);
}
bool IRRegExpMacroAssembler::CanReadUnaligned() {
@ -1480,22 +1480,15 @@ void IRRegExpMacroAssembler::CheckStackLimit() {
void IRRegExpMacroAssembler::GrowStack() {
TAG();
const Library& lib = Library::Handle(Library::InternalLibrary());
const Function& grow_function = Function::ZoneHandle(
Z, lib.LookupFunctionAllowPrivate(Symbols::GrowRegExpStack()));
StoreLocal(stack_, Bind(StaticCall(grow_function, PushLocal(stack_),
ICData::kStatic)));
const Library& lib = Library::ZoneHandle(Z, Library::CoreLibrary());
const Class& regexp_class =
Class::ZoneHandle(Z, lib.LookupClassAllowPrivate(Symbols::_RegExp()));
// Note: :stack and stack_array_cell content might diverge because each
// instance of :matcher code has its own stack_array_cell embedded into it
// as a constant but :stack is a local variable and its value might be
// comming from OSR or deoptimization. This means we should never use
// stack_array_cell in the body of the :matcher to reload the :stack.
Value* stack_cell_push = Bind(new (Z) ConstantInstr(stack_array_cell_));
Value* index_push = Bind(Uint64Constant(0));
Value* stack_push = PushLocal(stack_);
Do(InstanceCall(InstanceCallDescriptor::FromToken(Token::kASSIGN_INDEX),
stack_cell_push, index_push, stack_push));
const Function& grow_backtracking_stack_function =
Function::ZoneHandle(Z, regexp_class.LookupFunctionAllowPrivate(
Symbols::_growBacktrackingStack()));
StoreLocal(stack_, Bind(StaticCall(grow_backtracking_stack_function,
ICData::kStatic)));
}
void IRRegExpMacroAssembler::ReadCurrentPositionFromRegister(intptr_t reg) {

View file

@ -430,14 +430,14 @@ class IRRegExpMacroAssembler : public RegExpMacroAssembler {
intptr_t registers_count_;
const intptr_t saved_registers_count_;
// The actual array objects used for the stack and registers.
Array& stack_array_cell_;
TypedData& registers_array_;
IdAllocator block_id_;
IdAllocator temp_id_;
IdAllocator local_id_;
IdAllocator indirect_id_;
// Placeholder instruction holding number of registers in Irregexp entry block
// that is replaced with correct value during code finalization.
ConstantInstr* num_registers_constant_instr = nullptr;
};
} // namespace dart

View file

@ -151,7 +151,6 @@ class ObjectPointerVisitor;
V(GetRuntimeType, "get:runtimeType") \
V(GetterPrefix, "get:") \
V(GreaterEqualOperator, ">=") \
V(GrowRegExpStack, "_growRegExpStack") \
V(HaveSameRuntimeType, "_haveSameRuntimeType") \
V(ICData, "ICData") \
V(Identical, "identical") \
@ -420,6 +419,9 @@ class ObjectPointerVisitor;
V(_current, "_current") \
V(_ensureScheduleImmediate, "_ensureScheduleImmediate") \
V(_future, "_future") \
V(_getBacktrackingStack, "_getBacktrackingStack") \
V(_getRegisters, "_getRegisters") \
V(_growBacktrackingStack, "_growBacktrackingStack") \
V(_handleException, "_handleException") \
V(_handleFinalizerMessage, "_handleFinalizerMessage") \
V(_handleMessage, "_handleMessage") \

View file

@ -58,7 +58,7 @@ import "dart:ffi" show Pointer, Struct, Union, NativePort;
import "dart:isolate" show Isolate, RawReceivePort;
import "dart:typed_data" show Uint8List, Uint16List;
import "dart:typed_data" show Uint8List, Uint16List, Int32List;
/// These are the additional parts of this patch library:
part "array.dart";

View file

@ -146,15 +146,6 @@ external _prependTypeArguments(
@pragma("vm:external-name", "Internal_boundsCheckForPartialInstantiation")
external _boundsCheckForPartialInstantiation(closure, typeArgs);
// Called by IRRegExpMacroAssembler::GrowStack.
Int32List _growRegExpStack(Int32List stack) {
final newStack = new Int32List(stack.length * 2);
for (int i = 0; i < stack.length; i++) {
newStack[i] = stack[i];
}
return newStack;
}
@patch
@pragma("vm:external-name", "Internal_unsafeCast")
external T unsafeCast<T>(dynamic v);

View file

@ -365,6 +365,36 @@ class _RegExp implements RegExp {
@pragma("vm:recognized", "asm-intrinsic")
@pragma("vm:external-name", "RegExp_ExecuteMatchSticky")
external List<int>? _ExecuteMatchSticky(String str, int start_index);
static Int32List _getRegisters(int registers_count) {
if (_registers == null || _registers!.length < registers_count) {
_registers = Int32List(registers_count);
}
return _registers!;
}
static Int32List _getBacktrackingStack() {
if (_backtrackingStack == null) {
const _initialBacktrackingStackSize = 128;
_backtrackingStack = Int32List(_initialBacktrackingStackSize);
}
return _backtrackingStack!;
}
// TODO: Should we bound this to the same limit used by the irregexp interpreter
// for consistency?
static Int32List _growBacktrackingStack() {
final stack = _backtrackingStack!;
final newStack = Int32List(stack.length * 2);
for (int i = 0; i < stack.length; i++) {
newStack[i] = stack[i];
}
_backtrackingStack = newStack;
return newStack;
}
static Int32List? _registers;
static Int32List? _backtrackingStack;
}
class _AllMatchesIterable extends IterableBase<RegExpMatch> {