[VM] Reland 'Use quadratic probing in hash_table.h'

This was reverted due to https://github.com/dart-lang/sdk/issues/31944 which
now looks unrelated. Originally reviewed at
https://dart-review.googlesource.com/c/sdk/+/34760

R=kustermann@google.com

Change-Id: I78d54e9081bbeac36d01a843f4d281a076c00748
Reviewed-on: https://dart-review.googlesource.com/36340
Reviewed-by: Martin Kustermann <kustermann@google.com>
Commit-Queue: Erik Corry <erikcorry@google.com>
This commit is contained in:
Erik Corry 2018-01-22 22:54:45 +00:00 committed by commit-bot@chromium.org
parent 85b73ddd40
commit 42e2a555be

View file

@ -113,9 +113,12 @@ class HashTable : public ValueObject {
// Returns a backing storage size such that 'num_occupied' distinct keys can
// be inserted into the table.
static intptr_t ArrayLengthForNumOccupied(intptr_t num_occupied) {
// The current invariant requires at least one unoccupied entry.
// TODO(koda): Adjust if moving to quadratic probing.
intptr_t num_entries = num_occupied + 1;
// Because we use quadratic (actually triangle number) probing it is
// important that the size is a power of two (otherwise we could fail to
// find an empty slot). This is described in Knuth's The Art of Computer
// Programming Volume 2, Chapter 6.4, exercise 20 (solution in the
// appendix, 2nd edition).
intptr_t num_entries = Utils::RoundUpToPowerOfTwo(num_occupied + 1);
return kFirstKeyIndex + (kEntrySize * num_entries);
}
@ -131,6 +134,7 @@ class HashTable : public ValueObject {
data_->SetAt(kNumLT5LookupsIndex, *smi_handle_);
data_->SetAt(kNumLT25LookupsIndex, *smi_handle_);
data_->SetAt(kNumGT25LookupsIndex, *smi_handle_);
data_->SetAt(kNumProbesIndex, *smi_handle_);
#endif // !defined(PRODUCT)
for (intptr_t i = kHeaderSize; i < data_->Length(); ++i) {
@ -152,8 +156,9 @@ class HashTable : public ValueObject {
// TODO(koda): Add salt.
NOT_IN_PRODUCT(intptr_t collisions = 0;)
uword hash = KeyTraits::Hash(key);
intptr_t probe = hash % num_entries;
// TODO(koda): Consider quadratic probing.
ASSERT(Utils::IsPowerOfTwo(num_entries));
intptr_t probe = hash & (num_entries - 1);
int probe_distance = 1;
while (true) {
if (IsUnused(probe)) {
NOT_IN_PRODUCT(UpdateCollisions(collisions);)
@ -166,9 +171,10 @@ class HashTable : public ValueObject {
}
NOT_IN_PRODUCT(collisions += 1;)
}
// Advance probe.
probe++;
probe = (probe == num_entries) ? 0 : probe;
// Advance probe. See ArrayLengthForNumOccupied comment for
// explanation of how we know this hits all slots.
probe = (probe + probe_distance) & (num_entries - 1);
probe_distance++;
}
UNREACHABLE();
return -1;
@ -185,9 +191,10 @@ class HashTable : public ValueObject {
ASSERT(NumOccupied() < num_entries);
NOT_IN_PRODUCT(intptr_t collisions = 0;)
uword hash = KeyTraits::Hash(key);
intptr_t probe = hash % num_entries;
ASSERT(Utils::IsPowerOfTwo(num_entries));
intptr_t probe = hash & (num_entries - 1);
int probe_distance = 1;
intptr_t deleted = -1;
// TODO(koda): Consider quadratic probing.
while (true) {
if (IsUnused(probe)) {
*entry = (deleted != -1) ? deleted : probe;
@ -206,9 +213,10 @@ class HashTable : public ValueObject {
}
NOT_IN_PRODUCT(collisions += 1;)
}
// Advance probe.
probe++;
probe = (probe == num_entries) ? 0 : probe;
// Advance probe. See ArrayLengthForNumOccupied comment for
// explanation of how we know this hits all slots.
probe = (probe + probe_distance) & (num_entries - 1);
probe_distance++;
}
UNREACHABLE();
return false;
@ -286,6 +294,7 @@ class HashTable : public ValueObject {
intptr_t NumGT25Collisions() const {
return GetSmiValueAt(kNumGT25LookupsIndex);
}
intptr_t NumProbes() const { return GetSmiValueAt(kNumProbesIndex); }
void UpdateGrowth() const {
if (KeyTraits::ReportStats()) {
AdjustSmiValueAt(kNumGrowsIndex, 1);
@ -296,6 +305,7 @@ class HashTable : public ValueObject {
if (data_->raw()->IsVMHeapObject()) {
return;
}
AdjustSmiValueAt(kNumProbesIndex, collisions + 1);
if (collisions < 5) {
AdjustSmiValueAt(kNumLT5LookupsIndex, 1);
} else if (collisions < 25) {
@ -309,17 +319,21 @@ class HashTable : public ValueObject {
if (!KeyTraits::ReportStats()) {
return;
}
const intptr_t num5 = NumLT5Collisions();
const intptr_t num25 = NumLT25Collisions();
const intptr_t num_more = NumGT25Collisions();
// clang-format off
OS::Print("Stats for %s table :\n"
" Size of table = %" Pd ",Number of Occupied entries = %" Pd "\n"
" Number of Grows = %" Pd "\n"
" Number of look ups with < 5 collisions = %" Pd "\n"
" Number of look ups with < 25 collisions = %" Pd "\n"
" Number of look ups with > 25 collisions = %" Pd "\n",
" Number of lookups with < 5 collisions = %" Pd "\n"
" Number of lookups with < 25 collisions = %" Pd "\n"
" Number of lookups with > 25 collisions = %" Pd "\n"
" Average number of probes = %g\n",
KeyTraits::Name(),
NumEntries(), NumOccupied(),
NumGrows(),
NumLT5Collisions(), NumLT25Collisions(), NumGT25Collisions());
NumEntries(), NumOccupied(), NumGrows(),
num5, num25, num_more,
static_cast<double>(NumProbes()) / (num5 + num25 + num_more));
// clang-format on
}
#endif // !PRODUCT
@ -334,7 +348,8 @@ class HashTable : public ValueObject {
static const intptr_t kNumLT5LookupsIndex = 3;
static const intptr_t kNumLT25LookupsIndex = 4;
static const intptr_t kNumGT25LookupsIndex = 5;
static const intptr_t kHeaderSize = kNumGT25LookupsIndex + 1;
static const intptr_t kNumProbesIndex = 6;
static const intptr_t kHeaderSize = kNumProbesIndex + 1;
#endif
static const intptr_t kMetaDataIndex = kHeaderSize;
static const intptr_t kFirstKeyIndex = kHeaderSize + kMetaDataSize;
@ -462,15 +477,28 @@ class HashTables : public AllStatic {
}
template <typename Table>
static void EnsureLoadFactor(double low, double high, const Table& table) {
double current = (1 + table.NumOccupied() + table.NumDeleted()) /
static_cast<double>(table.NumEntries());
if (low <= current && current < high) {
static void EnsureLoadFactor(double high, const Table& table) {
// We count deleted elements because they take up space just
// like occupied slots in order to cause a rehashing.
const double current = (1 + table.NumOccupied() + table.NumDeleted()) /
static_cast<double>(table.NumEntries());
const bool too_many_deleted = table.NumOccupied() <= table.NumDeleted();
if (current < high && !too_many_deleted) {
return;
}
double target = (low + high) / 2.0;
intptr_t new_capacity = (1 + table.NumOccupied()) / target;
Table new_table(New<Table>(new_capacity,
// Normally we double the size here, but if less than half are occupied
// then it won't grow (this would imply that there were quite a lot of
// deleted slots). We don't want to constantly rehash if we are adding
// and deleting entries at just under the load factor limit, so we may
// double the size even though the number of occupied slots would not
// necessarily justify it. For example if the max load factor is 71% and
// the table is 70% full we will double the size to avoid a rehash every
// time 1% has been added and deleted.
const intptr_t new_capacity = table.NumOccupied() * 2 + 1;
ASSERT(table.NumOccupied() == 0 ||
((1.0 + table.NumOccupied()) /
Utils::RoundUpToPowerOfTwo(new_capacity)) <= high);
Table new_table(New<Table>(new_capacity, // Is rounded up to power of 2.
table.data_->IsOld() ? Heap::kOld : Heap::kNew));
Copy(table, new_table);
*table.data_ = new_table.Release().raw();
@ -579,9 +607,8 @@ class HashMap : public BaseIterTable {
protected:
void EnsureCapacity() const {
static const double kMaxLoadFactor = 0.75;
// We currently never shrink.
HashTables::EnsureLoadFactor(0.0, kMaxLoadFactor, *this);
static const double kMaxLoadFactor = 0.71;
HashTables::EnsureLoadFactor(kMaxLoadFactor, *this);
}
};
@ -666,9 +693,8 @@ class HashSet : public BaseIterTable {
protected:
void EnsureCapacity() const {
static const double kMaxLoadFactor = 0.75;
// We currently never shrink.
HashTables::EnsureLoadFactor(0.0, kMaxLoadFactor, *this);
static const double kMaxLoadFactor = 0.71;
HashTables::EnsureLoadFactor(kMaxLoadFactor, *this);
}
};