[vm] Weakly cache all RegExp per isolate group, instead of strongly caching 256 RegExp per isolate.

TEST=ci
Bug: https://github.com/dart-lang/sdk/issues/51228
Change-Id: Ie2869585ae847ea154460122d7ec5af81ef7697c
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/280521
Commit-Queue: Ryan Macnak <rmacnak@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
Reviewed-by: Alexander Aprelev <aam@google.com>
This commit is contained in:
Ryan Macnak 2023-02-06 21:49:07 +00:00 committed by Commit Queue
parent 85ee8dbf42
commit 09c9301ca0
9 changed files with 472 additions and 454 deletions

View file

@ -4,11 +4,14 @@
#include "platform/assert.h"
#include "vm/bootstrap_natives.h"
#include "vm/canonical_tables.h"
#include "vm/exceptions.h"
#include "vm/native_entry.h"
#include "vm/object.h"
#include "vm/object_store.h"
#include "vm/regexp_assembler_bytecode.h"
#include "vm/regexp_parser.h"
#include "vm/reusable_handles.h"
#include "vm/thread.h"
#if !defined(DART_PRECOMPILED_RUNTIME)
@ -21,34 +24,54 @@ DEFINE_NATIVE_ENTRY(RegExp_factory, 0, 6) {
ASSERT(
TypeArguments::CheckedHandle(zone, arguments->NativeArgAt(0)).IsNull());
GET_NON_NULL_NATIVE_ARGUMENT(String, pattern, arguments->NativeArgAt(1));
GET_NON_NULL_NATIVE_ARGUMENT(Instance, handle_multi_line,
arguments->NativeArgAt(2));
GET_NON_NULL_NATIVE_ARGUMENT(Instance, handle_case_sensitive,
arguments->NativeArgAt(3));
GET_NON_NULL_NATIVE_ARGUMENT(Instance, handle_unicode,
arguments->NativeArgAt(4));
GET_NON_NULL_NATIVE_ARGUMENT(Instance, handle_dot_all,
arguments->NativeArgAt(5));
bool ignore_case = handle_case_sensitive.ptr() != Bool::True().ptr();
bool multi_line = handle_multi_line.ptr() == Bool::True().ptr();
bool unicode = handle_unicode.ptr() == Bool::True().ptr();
bool dot_all = handle_dot_all.ptr() == Bool::True().ptr();
bool multi_line = arguments->NativeArgAt(2) == Bool::True().ptr();
bool ignore_case = arguments->NativeArgAt(3) != Bool::True().ptr();
bool unicode = arguments->NativeArgAt(4) == Bool::True().ptr();
bool dot_all = arguments->NativeArgAt(5) == Bool::True().ptr();
RegExpFlags flags;
flags.SetGlobal(); // All dart regexps are global.
if (ignore_case) flags.SetIgnoreCase();
if (multi_line) flags.SetMultiLine();
if (unicode) flags.SetUnicode();
if (dot_all) flags.SetDotAll();
RegExpKey lookup_key(pattern, flags);
RegExp& regexp = RegExp::Handle(thread->zone());
{
REUSABLE_OBJECT_HANDLESCOPE(thread);
REUSABLE_SMI_HANDLESCOPE(thread);
REUSABLE_WEAK_ARRAY_HANDLESCOPE(thread);
Object& key = thread->ObjectHandle();
Smi& value = thread->SmiHandle();
WeakArray& data = thread->WeakArrayHandle();
data = thread->isolate_group()->object_store()->regexp_table();
CanonicalRegExpSet table(&key, &value, &data);
regexp ^= table.GetOrNull(lookup_key);
table.Release();
if (!regexp.IsNull()) {
return regexp.ptr();
}
}
// Parse the pattern once in order to throw any format exceptions within
// the factory constructor. It is parsed again upon compilation.
RegExpCompileData compileData;
// Throws an exception on parsing failure.
RegExpParser::ParseRegExp(pattern, flags, &compileData);
// Create a RegExp object containing only the initial parameters.
return RegExpEngine::CreateRegExp(thread, pattern, flags);
{
SafepointMutexLocker ml(thread->isolate_group()->symbols_mutex());
CanonicalRegExpSet table(
thread->zone(),
thread->isolate_group()->object_store()->regexp_table());
regexp ^= table.InsertNewOrGet(lookup_key);
thread->isolate_group()->object_store()->set_regexp_table(table.Release());
}
ASSERT(regexp.flags() == flags);
return regexp.ptr();
}
DEFINE_NATIVE_ENTRY(RegExp_getPattern, 0, 1) {

View file

@ -4,6 +4,8 @@
#include "vm/canonical_tables.h"
#include "vm/regexp.h"
namespace dart {
bool MetadataMapTraits::IsMatch(const Object& a, const Object& b) {
@ -111,4 +113,9 @@ ObjectPtr CanonicalInstanceTraits::NewKey(const CanonicalInstanceKey& obj) {
return obj.key_.ptr();
}
ObjectPtr CanonicalRegExpTraits::NewKey(const RegExpKey& key) {
return RegExpEngine::CreateRegExp(Thread::Current(), key.pattern_,
key.flags_);
}
} // namespace dart

View file

@ -430,6 +430,47 @@ struct CanonicalFfiCallbackFunctionTraits {
using FfiCallbackFunctionSet =
UnorderedHashSet<CanonicalFfiCallbackFunctionTraits>;
class RegExpKey {
public:
RegExpKey(const String& pattern, RegExpFlags flags)
: pattern_(pattern), flags_(flags) {}
bool Equals(const RegExp& other) const {
return pattern_.Equals(String::Handle(other.pattern())) &&
(flags_ == other.flags());
}
uword Hash() const {
// Must agree with RegExp::CanonicalizeHash.
return CombineHashes(pattern_.Hash(), flags_.value());
}
const String& pattern_;
RegExpFlags flags_;
private:
DISALLOW_ALLOCATION();
};
class CanonicalRegExpTraits {
public:
static const char* Name() { return "CanonicalRegExpTraits"; }
static bool ReportStats() { return false; }
static bool IsMatch(const Object& a, const Object& b) {
return RegExp::Cast(a).CanonicalizeEquals(RegExp::Cast(b));
}
static bool IsMatch(const RegExpKey& a, const Object& b) {
return a.Equals(RegExp::Cast(b));
}
static uword Hash(const Object& key) {
return RegExp::Cast(key).CanonicalizeHash();
}
static uword Hash(const RegExpKey& key) { return key.Hash(); }
static ObjectPtr NewKey(const RegExpKey& key);
};
typedef UnorderedHashSet<CanonicalRegExpTraits, WeakAcqRelStorageTraits>
CanonicalRegExpSet;
} // namespace dart
#endif // RUNTIME_VM_CANONICAL_TABLES_H_

File diff suppressed because it is too large Load diff

View file

@ -1732,6 +1732,7 @@ ErrorPtr Object::Init(IsolateGroup* isolate_group,
Class& cls = Class::Handle(zone);
Type& type = Type::Handle(zone);
Array& array = Array::Handle(zone);
WeakArray& weak_array = WeakArray::Handle(zone);
Library& lib = Library::Handle(zone);
TypeArguments& type_args = TypeArguments::Handle(zone);
@ -1761,6 +1762,12 @@ ErrorPtr Object::Init(IsolateGroup* isolate_group,
RTN::GrowableObjectArray::type_arguments_offset());
cls.set_num_type_arguments_unsafe(1);
// Initialize hash set for regexp_table_.
const intptr_t kInitialCanonicalRegExpSize = 4;
weak_array = HashTables::New<CanonicalRegExpSet>(
kInitialCanonicalRegExpSize, Heap::kOld);
object_store->set_regexp_table(weak_array);
// Initialize hash set for canonical types.
const intptr_t kInitialCanonicalTypeSize = 16;
array = HashTables::New<CanonicalTypeSet>(kInitialCanonicalTypeSize,
@ -27289,6 +27296,11 @@ bool RegExp::CanonicalizeEquals(const Instance& other) const {
return true;
}
uint32_t RegExp::CanonicalizeHash() const {
// Must agree with RegExpKey::Hash.
return CombineHashes(String::Hash(pattern()), flags().value());
}
const char* RegExp::ToCString() const {
const String& str = String::Handle(pattern());
return OS::SCreate(Thread::Current()->zone(), "RegExp: pattern=%s flags=%s",

View file

@ -12436,8 +12436,12 @@ class RegExpFlags {
int value() const { return value_; }
bool operator==(const RegExpFlags& other) { return value_ == other.value_; }
bool operator!=(const RegExpFlags& other) { return value_ != other.value_; }
bool operator==(const RegExpFlags& other) const {
return value_ == other.value_;
}
bool operator!=(const RegExpFlags& other) const {
return value_ != other.value_;
}
private:
int value_;
@ -12601,6 +12605,7 @@ class RegExp : public Instance {
}
virtual bool CanonicalizeEquals(const Instance& other) const;
virtual uint32_t CanonicalizeHash() const;
static intptr_t InstanceSize() {
return RoundedAllocationSize(sizeof(UntaggedRegExp));

View file

@ -141,6 +141,7 @@ class ObjectPointerVisitor;
RW(Class, finalizer_entry_class) \
RW(Class, native_finalizer_class) \
ARW_AR(WeakArray, symbol_table) \
ARW_AR(WeakArray, regexp_table) \
RW(Array, canonical_types) \
RW(Array, canonical_function_types) \
RW(Array, canonical_record_types) \

View file

@ -337,7 +337,7 @@ template <typename StringType>
StringPtr Symbols::NewSymbol(Thread* thread, const StringType& str) {
REUSABLE_OBJECT_HANDLESCOPE(thread);
REUSABLE_SMI_HANDLESCOPE(thread);
REUSABLE_ARRAY_HANDLESCOPE(thread);
REUSABLE_WEAK_ARRAY_HANDLESCOPE(thread);
String& symbol = String::Handle(thread->zone());
dart::Object& key = thread->ObjectHandle();
Smi& value = thread->SmiHandle();

View file

@ -12,36 +12,11 @@ class RegExp {
bool caseSensitive = true,
bool unicode = false,
bool dotAll = false}) {
_RegExpHashKey key =
new _RegExpHashKey(source, multiLine, caseSensitive, unicode, dotAll);
_RegExpHashValue? value = _cache[key];
if (value == null) {
if (_cache.length > _MAX_CACHE_SIZE) {
_RegExpHashKey lastKey = _recentlyUsed.last;
_recentlyUsed.remove(lastKey);
_cache.remove(lastKey);
}
value = new _RegExpHashValue(
new _RegExp(source,
multiLine: multiLine,
caseSensitive: caseSensitive,
unicode: unicode,
dotAll: dotAll),
key);
_cache[key] = value;
} else {
value.key.unlink();
}
assert(value != null);
_recentlyUsed.addFirst(value.key);
assert(_recentlyUsed.length == _cache.length);
// TODO(zerny): We might not want to canonicalize regexp objects.
return value.regexp;
return new _RegExp(source,
multiLine: multiLine,
caseSensitive: caseSensitive,
unicode: unicode,
dotAll: dotAll);
}
/**
@ -96,57 +71,11 @@ class RegExp {
return buffer.toString();
}
// Regular expression objects are stored in a cache of up to _MAX_CACHE_SIZE
// elements using an LRU eviction strategy.
// TODO(zerny): Do not impose a fixed limit on the number of cached objects.
// Other possibilities could be limiting by the size of the regexp objects,
// or imposing a lower time bound for the most recent use under which a regexp
// may not be removed from the cache.
// TODO(zerny): Use self-sizing cache similar to _AccessorCache in
// mirrors_impl.dart.
static const int _MAX_CACHE_SIZE = 256;
static final Map<_RegExpHashKey, _RegExpHashValue> _cache =
new HashMap<_RegExpHashKey, _RegExpHashValue>();
static final LinkedList<_RegExpHashKey> _recentlyUsed =
new LinkedList<_RegExpHashKey>();
int get _groupCount;
Iterable<String> get _groupNames;
int _groupNameIndex(String name);
}
// Represents both a key in the regular expression cache as well as its
// corresponding entry in the LRU list.
class _RegExpHashKey extends LinkedListEntry<_RegExpHashKey> {
final String pattern;
final bool multiLine;
final bool caseSensitive;
final bool unicode;
final bool dotAll;
_RegExpHashKey(this.pattern, this.multiLine, this.caseSensitive, this.unicode,
this.dotAll);
int get hashCode => pattern.hashCode;
bool operator ==(that) {
return (that is _RegExpHashKey) &&
(this.pattern == that.pattern) &&
(this.multiLine == that.multiLine) &&
(this.caseSensitive == that.caseSensitive) &&
(this.unicode == that.unicode) &&
(this.dotAll == that.dotAll);
}
}
// Represents a value in the regular expression cache. Contains a pointer
// back to the key in order to access the corresponding LRU entry.
class _RegExpHashValue {
final _RegExp regexp;
final _RegExpHashKey key;
_RegExpHashValue(this.regexp, this.key);
}
class _RegExpMatch implements RegExpMatch {
_RegExpMatch._(this._regexp, this.input, this._match);