mirror of
https://github.com/dart-lang/sdk
synced 2024-09-16 04:16:51 +00:00
[vm] Weakly cache all RegExp per isolate group, instead of strongly caching 256 RegExp per isolate.
TEST=ci Bug: https://github.com/dart-lang/sdk/issues/51228 Change-Id: Ie2869585ae847ea154460122d7ec5af81ef7697c Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/280521 Commit-Queue: Ryan Macnak <rmacnak@google.com> Reviewed-by: Martin Kustermann <kustermann@google.com> Reviewed-by: Alexander Aprelev <aam@google.com>
This commit is contained in:
parent
85ee8dbf42
commit
09c9301ca0
|
@ -4,11 +4,14 @@
|
|||
|
||||
#include "platform/assert.h"
|
||||
#include "vm/bootstrap_natives.h"
|
||||
#include "vm/canonical_tables.h"
|
||||
#include "vm/exceptions.h"
|
||||
#include "vm/native_entry.h"
|
||||
#include "vm/object.h"
|
||||
#include "vm/object_store.h"
|
||||
#include "vm/regexp_assembler_bytecode.h"
|
||||
#include "vm/regexp_parser.h"
|
||||
#include "vm/reusable_handles.h"
|
||||
#include "vm/thread.h"
|
||||
|
||||
#if !defined(DART_PRECOMPILED_RUNTIME)
|
||||
|
@ -21,34 +24,54 @@ DEFINE_NATIVE_ENTRY(RegExp_factory, 0, 6) {
|
|||
ASSERT(
|
||||
TypeArguments::CheckedHandle(zone, arguments->NativeArgAt(0)).IsNull());
|
||||
GET_NON_NULL_NATIVE_ARGUMENT(String, pattern, arguments->NativeArgAt(1));
|
||||
GET_NON_NULL_NATIVE_ARGUMENT(Instance, handle_multi_line,
|
||||
arguments->NativeArgAt(2));
|
||||
GET_NON_NULL_NATIVE_ARGUMENT(Instance, handle_case_sensitive,
|
||||
arguments->NativeArgAt(3));
|
||||
GET_NON_NULL_NATIVE_ARGUMENT(Instance, handle_unicode,
|
||||
arguments->NativeArgAt(4));
|
||||
GET_NON_NULL_NATIVE_ARGUMENT(Instance, handle_dot_all,
|
||||
arguments->NativeArgAt(5));
|
||||
bool ignore_case = handle_case_sensitive.ptr() != Bool::True().ptr();
|
||||
bool multi_line = handle_multi_line.ptr() == Bool::True().ptr();
|
||||
bool unicode = handle_unicode.ptr() == Bool::True().ptr();
|
||||
bool dot_all = handle_dot_all.ptr() == Bool::True().ptr();
|
||||
|
||||
bool multi_line = arguments->NativeArgAt(2) == Bool::True().ptr();
|
||||
bool ignore_case = arguments->NativeArgAt(3) != Bool::True().ptr();
|
||||
bool unicode = arguments->NativeArgAt(4) == Bool::True().ptr();
|
||||
bool dot_all = arguments->NativeArgAt(5) == Bool::True().ptr();
|
||||
|
||||
RegExpFlags flags;
|
||||
|
||||
flags.SetGlobal(); // All dart regexps are global.
|
||||
if (ignore_case) flags.SetIgnoreCase();
|
||||
if (multi_line) flags.SetMultiLine();
|
||||
if (unicode) flags.SetUnicode();
|
||||
if (dot_all) flags.SetDotAll();
|
||||
|
||||
RegExpKey lookup_key(pattern, flags);
|
||||
RegExp& regexp = RegExp::Handle(thread->zone());
|
||||
{
|
||||
REUSABLE_OBJECT_HANDLESCOPE(thread);
|
||||
REUSABLE_SMI_HANDLESCOPE(thread);
|
||||
REUSABLE_WEAK_ARRAY_HANDLESCOPE(thread);
|
||||
Object& key = thread->ObjectHandle();
|
||||
Smi& value = thread->SmiHandle();
|
||||
WeakArray& data = thread->WeakArrayHandle();
|
||||
data = thread->isolate_group()->object_store()->regexp_table();
|
||||
CanonicalRegExpSet table(&key, &value, &data);
|
||||
regexp ^= table.GetOrNull(lookup_key);
|
||||
table.Release();
|
||||
if (!regexp.IsNull()) {
|
||||
return regexp.ptr();
|
||||
}
|
||||
}
|
||||
|
||||
// Parse the pattern once in order to throw any format exceptions within
|
||||
// the factory constructor. It is parsed again upon compilation.
|
||||
RegExpCompileData compileData;
|
||||
// Throws an exception on parsing failure.
|
||||
RegExpParser::ParseRegExp(pattern, flags, &compileData);
|
||||
|
||||
// Create a RegExp object containing only the initial parameters.
|
||||
return RegExpEngine::CreateRegExp(thread, pattern, flags);
|
||||
{
|
||||
SafepointMutexLocker ml(thread->isolate_group()->symbols_mutex());
|
||||
CanonicalRegExpSet table(
|
||||
thread->zone(),
|
||||
thread->isolate_group()->object_store()->regexp_table());
|
||||
regexp ^= table.InsertNewOrGet(lookup_key);
|
||||
thread->isolate_group()->object_store()->set_regexp_table(table.Release());
|
||||
}
|
||||
|
||||
ASSERT(regexp.flags() == flags);
|
||||
return regexp.ptr();
|
||||
}
|
||||
|
||||
DEFINE_NATIVE_ENTRY(RegExp_getPattern, 0, 1) {
|
||||
|
|
|
@ -4,6 +4,8 @@
|
|||
|
||||
#include "vm/canonical_tables.h"
|
||||
|
||||
#include "vm/regexp.h"
|
||||
|
||||
namespace dart {
|
||||
|
||||
bool MetadataMapTraits::IsMatch(const Object& a, const Object& b) {
|
||||
|
@ -111,4 +113,9 @@ ObjectPtr CanonicalInstanceTraits::NewKey(const CanonicalInstanceKey& obj) {
|
|||
return obj.key_.ptr();
|
||||
}
|
||||
|
||||
ObjectPtr CanonicalRegExpTraits::NewKey(const RegExpKey& key) {
|
||||
return RegExpEngine::CreateRegExp(Thread::Current(), key.pattern_,
|
||||
key.flags_);
|
||||
}
|
||||
|
||||
} // namespace dart
|
||||
|
|
|
@ -430,6 +430,47 @@ struct CanonicalFfiCallbackFunctionTraits {
|
|||
using FfiCallbackFunctionSet =
|
||||
UnorderedHashSet<CanonicalFfiCallbackFunctionTraits>;
|
||||
|
||||
class RegExpKey {
|
||||
public:
|
||||
RegExpKey(const String& pattern, RegExpFlags flags)
|
||||
: pattern_(pattern), flags_(flags) {}
|
||||
|
||||
bool Equals(const RegExp& other) const {
|
||||
return pattern_.Equals(String::Handle(other.pattern())) &&
|
||||
(flags_ == other.flags());
|
||||
}
|
||||
uword Hash() const {
|
||||
// Must agree with RegExp::CanonicalizeHash.
|
||||
return CombineHashes(pattern_.Hash(), flags_.value());
|
||||
}
|
||||
|
||||
const String& pattern_;
|
||||
RegExpFlags flags_;
|
||||
|
||||
private:
|
||||
DISALLOW_ALLOCATION();
|
||||
};
|
||||
|
||||
class CanonicalRegExpTraits {
|
||||
public:
|
||||
static const char* Name() { return "CanonicalRegExpTraits"; }
|
||||
static bool ReportStats() { return false; }
|
||||
static bool IsMatch(const Object& a, const Object& b) {
|
||||
return RegExp::Cast(a).CanonicalizeEquals(RegExp::Cast(b));
|
||||
}
|
||||
static bool IsMatch(const RegExpKey& a, const Object& b) {
|
||||
return a.Equals(RegExp::Cast(b));
|
||||
}
|
||||
static uword Hash(const Object& key) {
|
||||
return RegExp::Cast(key).CanonicalizeHash();
|
||||
}
|
||||
static uword Hash(const RegExpKey& key) { return key.Hash(); }
|
||||
static ObjectPtr NewKey(const RegExpKey& key);
|
||||
};
|
||||
|
||||
typedef UnorderedHashSet<CanonicalRegExpTraits, WeakAcqRelStorageTraits>
|
||||
CanonicalRegExpSet;
|
||||
|
||||
} // namespace dart
|
||||
|
||||
#endif // RUNTIME_VM_CANONICAL_TABLES_H_
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1732,6 +1732,7 @@ ErrorPtr Object::Init(IsolateGroup* isolate_group,
|
|||
Class& cls = Class::Handle(zone);
|
||||
Type& type = Type::Handle(zone);
|
||||
Array& array = Array::Handle(zone);
|
||||
WeakArray& weak_array = WeakArray::Handle(zone);
|
||||
Library& lib = Library::Handle(zone);
|
||||
TypeArguments& type_args = TypeArguments::Handle(zone);
|
||||
|
||||
|
@ -1761,6 +1762,12 @@ ErrorPtr Object::Init(IsolateGroup* isolate_group,
|
|||
RTN::GrowableObjectArray::type_arguments_offset());
|
||||
cls.set_num_type_arguments_unsafe(1);
|
||||
|
||||
// Initialize hash set for regexp_table_.
|
||||
const intptr_t kInitialCanonicalRegExpSize = 4;
|
||||
weak_array = HashTables::New<CanonicalRegExpSet>(
|
||||
kInitialCanonicalRegExpSize, Heap::kOld);
|
||||
object_store->set_regexp_table(weak_array);
|
||||
|
||||
// Initialize hash set for canonical types.
|
||||
const intptr_t kInitialCanonicalTypeSize = 16;
|
||||
array = HashTables::New<CanonicalTypeSet>(kInitialCanonicalTypeSize,
|
||||
|
@ -27289,6 +27296,11 @@ bool RegExp::CanonicalizeEquals(const Instance& other) const {
|
|||
return true;
|
||||
}
|
||||
|
||||
uint32_t RegExp::CanonicalizeHash() const {
|
||||
// Must agree with RegExpKey::Hash.
|
||||
return CombineHashes(String::Hash(pattern()), flags().value());
|
||||
}
|
||||
|
||||
const char* RegExp::ToCString() const {
|
||||
const String& str = String::Handle(pattern());
|
||||
return OS::SCreate(Thread::Current()->zone(), "RegExp: pattern=%s flags=%s",
|
||||
|
|
|
@ -12436,8 +12436,12 @@ class RegExpFlags {
|
|||
|
||||
int value() const { return value_; }
|
||||
|
||||
bool operator==(const RegExpFlags& other) { return value_ == other.value_; }
|
||||
bool operator!=(const RegExpFlags& other) { return value_ != other.value_; }
|
||||
bool operator==(const RegExpFlags& other) const {
|
||||
return value_ == other.value_;
|
||||
}
|
||||
bool operator!=(const RegExpFlags& other) const {
|
||||
return value_ != other.value_;
|
||||
}
|
||||
|
||||
private:
|
||||
int value_;
|
||||
|
@ -12601,6 +12605,7 @@ class RegExp : public Instance {
|
|||
}
|
||||
|
||||
virtual bool CanonicalizeEquals(const Instance& other) const;
|
||||
virtual uint32_t CanonicalizeHash() const;
|
||||
|
||||
static intptr_t InstanceSize() {
|
||||
return RoundedAllocationSize(sizeof(UntaggedRegExp));
|
||||
|
|
|
@ -141,6 +141,7 @@ class ObjectPointerVisitor;
|
|||
RW(Class, finalizer_entry_class) \
|
||||
RW(Class, native_finalizer_class) \
|
||||
ARW_AR(WeakArray, symbol_table) \
|
||||
ARW_AR(WeakArray, regexp_table) \
|
||||
RW(Array, canonical_types) \
|
||||
RW(Array, canonical_function_types) \
|
||||
RW(Array, canonical_record_types) \
|
||||
|
|
|
@ -337,7 +337,7 @@ template <typename StringType>
|
|||
StringPtr Symbols::NewSymbol(Thread* thread, const StringType& str) {
|
||||
REUSABLE_OBJECT_HANDLESCOPE(thread);
|
||||
REUSABLE_SMI_HANDLESCOPE(thread);
|
||||
REUSABLE_ARRAY_HANDLESCOPE(thread);
|
||||
REUSABLE_WEAK_ARRAY_HANDLESCOPE(thread);
|
||||
String& symbol = String::Handle(thread->zone());
|
||||
dart::Object& key = thread->ObjectHandle();
|
||||
Smi& value = thread->SmiHandle();
|
||||
|
|
|
@ -12,36 +12,11 @@ class RegExp {
|
|||
bool caseSensitive = true,
|
||||
bool unicode = false,
|
||||
bool dotAll = false}) {
|
||||
_RegExpHashKey key =
|
||||
new _RegExpHashKey(source, multiLine, caseSensitive, unicode, dotAll);
|
||||
_RegExpHashValue? value = _cache[key];
|
||||
|
||||
if (value == null) {
|
||||
if (_cache.length > _MAX_CACHE_SIZE) {
|
||||
_RegExpHashKey lastKey = _recentlyUsed.last;
|
||||
_recentlyUsed.remove(lastKey);
|
||||
_cache.remove(lastKey);
|
||||
}
|
||||
|
||||
value = new _RegExpHashValue(
|
||||
new _RegExp(source,
|
||||
multiLine: multiLine,
|
||||
caseSensitive: caseSensitive,
|
||||
unicode: unicode,
|
||||
dotAll: dotAll),
|
||||
key);
|
||||
_cache[key] = value;
|
||||
} else {
|
||||
value.key.unlink();
|
||||
}
|
||||
|
||||
assert(value != null);
|
||||
|
||||
_recentlyUsed.addFirst(value.key);
|
||||
assert(_recentlyUsed.length == _cache.length);
|
||||
|
||||
// TODO(zerny): We might not want to canonicalize regexp objects.
|
||||
return value.regexp;
|
||||
return new _RegExp(source,
|
||||
multiLine: multiLine,
|
||||
caseSensitive: caseSensitive,
|
||||
unicode: unicode,
|
||||
dotAll: dotAll);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -96,57 +71,11 @@ class RegExp {
|
|||
return buffer.toString();
|
||||
}
|
||||
|
||||
// Regular expression objects are stored in a cache of up to _MAX_CACHE_SIZE
|
||||
// elements using an LRU eviction strategy.
|
||||
// TODO(zerny): Do not impose a fixed limit on the number of cached objects.
|
||||
// Other possibilities could be limiting by the size of the regexp objects,
|
||||
// or imposing a lower time bound for the most recent use under which a regexp
|
||||
// may not be removed from the cache.
|
||||
// TODO(zerny): Use self-sizing cache similar to _AccessorCache in
|
||||
// mirrors_impl.dart.
|
||||
static const int _MAX_CACHE_SIZE = 256;
|
||||
static final Map<_RegExpHashKey, _RegExpHashValue> _cache =
|
||||
new HashMap<_RegExpHashKey, _RegExpHashValue>();
|
||||
static final LinkedList<_RegExpHashKey> _recentlyUsed =
|
||||
new LinkedList<_RegExpHashKey>();
|
||||
|
||||
int get _groupCount;
|
||||
Iterable<String> get _groupNames;
|
||||
int _groupNameIndex(String name);
|
||||
}
|
||||
|
||||
// Represents both a key in the regular expression cache as well as its
|
||||
// corresponding entry in the LRU list.
|
||||
class _RegExpHashKey extends LinkedListEntry<_RegExpHashKey> {
|
||||
final String pattern;
|
||||
final bool multiLine;
|
||||
final bool caseSensitive;
|
||||
final bool unicode;
|
||||
final bool dotAll;
|
||||
|
||||
_RegExpHashKey(this.pattern, this.multiLine, this.caseSensitive, this.unicode,
|
||||
this.dotAll);
|
||||
|
||||
int get hashCode => pattern.hashCode;
|
||||
bool operator ==(that) {
|
||||
return (that is _RegExpHashKey) &&
|
||||
(this.pattern == that.pattern) &&
|
||||
(this.multiLine == that.multiLine) &&
|
||||
(this.caseSensitive == that.caseSensitive) &&
|
||||
(this.unicode == that.unicode) &&
|
||||
(this.dotAll == that.dotAll);
|
||||
}
|
||||
}
|
||||
|
||||
// Represents a value in the regular expression cache. Contains a pointer
|
||||
// back to the key in order to access the corresponding LRU entry.
|
||||
class _RegExpHashValue {
|
||||
final _RegExp regexp;
|
||||
final _RegExpHashKey key;
|
||||
|
||||
_RegExpHashValue(this.regexp, this.key);
|
||||
}
|
||||
|
||||
class _RegExpMatch implements RegExpMatch {
|
||||
_RegExpMatch._(this._regexp, this.input, this._match);
|
||||
|
||||
|
|
Loading…
Reference in a new issue