mirror of
https://github.com/dart-lang/sdk
synced 2024-11-02 08:07:11 +00:00
[VM] Reland 'Use quadratic probing in hash_table.h'
This was reverted due to https://github.com/dart-lang/sdk/issues/31944 which now looks unrelated. Originally reviewed at https://dart-review.googlesource.com/c/sdk/+/34760 R=kustermann@google.com Change-Id: I78d54e9081bbeac36d01a843f4d281a076c00748 Reviewed-on: https://dart-review.googlesource.com/36340 Reviewed-by: Martin Kustermann <kustermann@google.com> Commit-Queue: Erik Corry <erikcorry@google.com>
This commit is contained in:
parent
85b73ddd40
commit
42e2a555be
1 changed files with 59 additions and 33 deletions
|
@ -113,9 +113,12 @@ class HashTable : public ValueObject {
|
|||
// Returns a backing storage size such that 'num_occupied' distinct keys can
|
||||
// be inserted into the table.
|
||||
static intptr_t ArrayLengthForNumOccupied(intptr_t num_occupied) {
|
||||
// The current invariant requires at least one unoccupied entry.
|
||||
// TODO(koda): Adjust if moving to quadratic probing.
|
||||
intptr_t num_entries = num_occupied + 1;
|
||||
// Because we use quadratic (actually triangle number) probing it is
|
||||
// important that the size is a power of two (otherwise we could fail to
|
||||
// find an empty slot). This is described in Knuth's The Art of Computer
|
||||
// Programming Volume 2, Chapter 6.4, exercise 20 (solution in the
|
||||
// appendix, 2nd edition).
|
||||
intptr_t num_entries = Utils::RoundUpToPowerOfTwo(num_occupied + 1);
|
||||
return kFirstKeyIndex + (kEntrySize * num_entries);
|
||||
}
|
||||
|
||||
|
@ -131,6 +134,7 @@ class HashTable : public ValueObject {
|
|||
data_->SetAt(kNumLT5LookupsIndex, *smi_handle_);
|
||||
data_->SetAt(kNumLT25LookupsIndex, *smi_handle_);
|
||||
data_->SetAt(kNumGT25LookupsIndex, *smi_handle_);
|
||||
data_->SetAt(kNumProbesIndex, *smi_handle_);
|
||||
#endif // !defined(PRODUCT)
|
||||
|
||||
for (intptr_t i = kHeaderSize; i < data_->Length(); ++i) {
|
||||
|
@ -152,8 +156,9 @@ class HashTable : public ValueObject {
|
|||
// TODO(koda): Add salt.
|
||||
NOT_IN_PRODUCT(intptr_t collisions = 0;)
|
||||
uword hash = KeyTraits::Hash(key);
|
||||
intptr_t probe = hash % num_entries;
|
||||
// TODO(koda): Consider quadratic probing.
|
||||
ASSERT(Utils::IsPowerOfTwo(num_entries));
|
||||
intptr_t probe = hash & (num_entries - 1);
|
||||
int probe_distance = 1;
|
||||
while (true) {
|
||||
if (IsUnused(probe)) {
|
||||
NOT_IN_PRODUCT(UpdateCollisions(collisions);)
|
||||
|
@ -166,9 +171,10 @@ class HashTable : public ValueObject {
|
|||
}
|
||||
NOT_IN_PRODUCT(collisions += 1;)
|
||||
}
|
||||
// Advance probe.
|
||||
probe++;
|
||||
probe = (probe == num_entries) ? 0 : probe;
|
||||
// Advance probe. See ArrayLengthForNumOccupied comment for
|
||||
// explanation of how we know this hits all slots.
|
||||
probe = (probe + probe_distance) & (num_entries - 1);
|
||||
probe_distance++;
|
||||
}
|
||||
UNREACHABLE();
|
||||
return -1;
|
||||
|
@ -185,9 +191,10 @@ class HashTable : public ValueObject {
|
|||
ASSERT(NumOccupied() < num_entries);
|
||||
NOT_IN_PRODUCT(intptr_t collisions = 0;)
|
||||
uword hash = KeyTraits::Hash(key);
|
||||
intptr_t probe = hash % num_entries;
|
||||
ASSERT(Utils::IsPowerOfTwo(num_entries));
|
||||
intptr_t probe = hash & (num_entries - 1);
|
||||
int probe_distance = 1;
|
||||
intptr_t deleted = -1;
|
||||
// TODO(koda): Consider quadratic probing.
|
||||
while (true) {
|
||||
if (IsUnused(probe)) {
|
||||
*entry = (deleted != -1) ? deleted : probe;
|
||||
|
@ -206,9 +213,10 @@ class HashTable : public ValueObject {
|
|||
}
|
||||
NOT_IN_PRODUCT(collisions += 1;)
|
||||
}
|
||||
// Advance probe.
|
||||
probe++;
|
||||
probe = (probe == num_entries) ? 0 : probe;
|
||||
// Advance probe. See ArrayLengthForNumOccupied comment for
|
||||
// explanation of how we know this hits all slots.
|
||||
probe = (probe + probe_distance) & (num_entries - 1);
|
||||
probe_distance++;
|
||||
}
|
||||
UNREACHABLE();
|
||||
return false;
|
||||
|
@ -286,6 +294,7 @@ class HashTable : public ValueObject {
|
|||
intptr_t NumGT25Collisions() const {
|
||||
return GetSmiValueAt(kNumGT25LookupsIndex);
|
||||
}
|
||||
intptr_t NumProbes() const { return GetSmiValueAt(kNumProbesIndex); }
|
||||
void UpdateGrowth() const {
|
||||
if (KeyTraits::ReportStats()) {
|
||||
AdjustSmiValueAt(kNumGrowsIndex, 1);
|
||||
|
@ -296,6 +305,7 @@ class HashTable : public ValueObject {
|
|||
if (data_->raw()->IsVMHeapObject()) {
|
||||
return;
|
||||
}
|
||||
AdjustSmiValueAt(kNumProbesIndex, collisions + 1);
|
||||
if (collisions < 5) {
|
||||
AdjustSmiValueAt(kNumLT5LookupsIndex, 1);
|
||||
} else if (collisions < 25) {
|
||||
|
@ -309,17 +319,21 @@ class HashTable : public ValueObject {
|
|||
if (!KeyTraits::ReportStats()) {
|
||||
return;
|
||||
}
|
||||
const intptr_t num5 = NumLT5Collisions();
|
||||
const intptr_t num25 = NumLT25Collisions();
|
||||
const intptr_t num_more = NumGT25Collisions();
|
||||
// clang-format off
|
||||
OS::Print("Stats for %s table :\n"
|
||||
" Size of table = %" Pd ",Number of Occupied entries = %" Pd "\n"
|
||||
" Number of Grows = %" Pd "\n"
|
||||
" Number of look ups with < 5 collisions = %" Pd "\n"
|
||||
" Number of look ups with < 25 collisions = %" Pd "\n"
|
||||
" Number of look ups with > 25 collisions = %" Pd "\n",
|
||||
" Number of lookups with < 5 collisions = %" Pd "\n"
|
||||
" Number of lookups with < 25 collisions = %" Pd "\n"
|
||||
" Number of lookups with > 25 collisions = %" Pd "\n"
|
||||
" Average number of probes = %g\n",
|
||||
KeyTraits::Name(),
|
||||
NumEntries(), NumOccupied(),
|
||||
NumGrows(),
|
||||
NumLT5Collisions(), NumLT25Collisions(), NumGT25Collisions());
|
||||
NumEntries(), NumOccupied(), NumGrows(),
|
||||
num5, num25, num_more,
|
||||
static_cast<double>(NumProbes()) / (num5 + num25 + num_more));
|
||||
// clang-format on
|
||||
}
|
||||
#endif // !PRODUCT
|
||||
|
@ -334,7 +348,8 @@ class HashTable : public ValueObject {
|
|||
static const intptr_t kNumLT5LookupsIndex = 3;
|
||||
static const intptr_t kNumLT25LookupsIndex = 4;
|
||||
static const intptr_t kNumGT25LookupsIndex = 5;
|
||||
static const intptr_t kHeaderSize = kNumGT25LookupsIndex + 1;
|
||||
static const intptr_t kNumProbesIndex = 6;
|
||||
static const intptr_t kHeaderSize = kNumProbesIndex + 1;
|
||||
#endif
|
||||
static const intptr_t kMetaDataIndex = kHeaderSize;
|
||||
static const intptr_t kFirstKeyIndex = kHeaderSize + kMetaDataSize;
|
||||
|
@ -462,15 +477,28 @@ class HashTables : public AllStatic {
|
|||
}
|
||||
|
||||
template <typename Table>
|
||||
static void EnsureLoadFactor(double low, double high, const Table& table) {
|
||||
double current = (1 + table.NumOccupied() + table.NumDeleted()) /
|
||||
static_cast<double>(table.NumEntries());
|
||||
if (low <= current && current < high) {
|
||||
static void EnsureLoadFactor(double high, const Table& table) {
|
||||
// We count deleted elements because they take up space just
|
||||
// like occupied slots in order to cause a rehashing.
|
||||
const double current = (1 + table.NumOccupied() + table.NumDeleted()) /
|
||||
static_cast<double>(table.NumEntries());
|
||||
const bool too_many_deleted = table.NumOccupied() <= table.NumDeleted();
|
||||
if (current < high && !too_many_deleted) {
|
||||
return;
|
||||
}
|
||||
double target = (low + high) / 2.0;
|
||||
intptr_t new_capacity = (1 + table.NumOccupied()) / target;
|
||||
Table new_table(New<Table>(new_capacity,
|
||||
// Normally we double the size here, but if less than half are occupied
|
||||
// then it won't grow (this would imply that there were quite a lot of
|
||||
// deleted slots). We don't want to constantly rehash if we are adding
|
||||
// and deleting entries at just under the load factor limit, so we may
|
||||
// double the size even though the number of occupied slots would not
|
||||
// necessarily justify it. For example if the max load factor is 71% and
|
||||
// the table is 70% full we will double the size to avoid a rehash every
|
||||
// time 1% has been added and deleted.
|
||||
const intptr_t new_capacity = table.NumOccupied() * 2 + 1;
|
||||
ASSERT(table.NumOccupied() == 0 ||
|
||||
((1.0 + table.NumOccupied()) /
|
||||
Utils::RoundUpToPowerOfTwo(new_capacity)) <= high);
|
||||
Table new_table(New<Table>(new_capacity, // Is rounded up to power of 2.
|
||||
table.data_->IsOld() ? Heap::kOld : Heap::kNew));
|
||||
Copy(table, new_table);
|
||||
*table.data_ = new_table.Release().raw();
|
||||
|
@ -579,9 +607,8 @@ class HashMap : public BaseIterTable {
|
|||
|
||||
protected:
|
||||
void EnsureCapacity() const {
|
||||
static const double kMaxLoadFactor = 0.75;
|
||||
// We currently never shrink.
|
||||
HashTables::EnsureLoadFactor(0.0, kMaxLoadFactor, *this);
|
||||
static const double kMaxLoadFactor = 0.71;
|
||||
HashTables::EnsureLoadFactor(kMaxLoadFactor, *this);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -666,9 +693,8 @@ class HashSet : public BaseIterTable {
|
|||
|
||||
protected:
|
||||
void EnsureCapacity() const {
|
||||
static const double kMaxLoadFactor = 0.75;
|
||||
// We currently never shrink.
|
||||
HashTables::EnsureLoadFactor(0.0, kMaxLoadFactor, *this);
|
||||
static const double kMaxLoadFactor = 0.71;
|
||||
HashTables::EnsureLoadFactor(kMaxLoadFactor, *this);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in a new issue