[VM/Library] - Adjust next probe to be a quadratic probe in _HashBase

Adjust next probe to a quadratic probe so that it matches up with the internal VM hash map implementation.This avoids some issues with hash code clustering and uses the probing strategy in the VM's HashTable.

Change-Id: I6af560b971c2e6fa745e3b3414e2d8a9075e7ff5
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/239004
Reviewed-by: Ryan Macnak <rmacnak@google.com>
Commit-Queue: Siva Annamalai <asiva@google.com>
This commit is contained in:
asiva 2022-09-20 23:35:25 +00:00 committed by Commit Bot
parent cf0efb4654
commit 959854769c

View file

@ -180,14 +180,14 @@ mixin _HashBase on _HashAbstractBase {
return (maskedHash == 0) ? (size >> 1) : maskedHash * (size >> 1);
}
// Linear probing.
// Quadratic probing.
static int _firstProbe(int fullHash, int sizeMask) {
final int i = fullHash & sizeMask;
// Light, fast shuffle to mitigate bad hashCode (e.g., sequential).
return ((i << 1) + i) & sizeMask;
}
static int _nextProbe(int i, int sizeMask) => (i + 1) & sizeMask;
static int _nextProbe(int i, int incr, int sizeMask) => (i + incr) & sizeMask;
// A self-loop is used to mark a deleted key or value.
static bool _isDeleted(List<Object?> data, Object? keyOrValue) =>
@ -334,6 +334,11 @@ mixin _ImmutableLinkedHashMapMixin<K, V>
}
void _createIndex() {
// Because we use quadratic (actually triangle number) probing it is
// important that the size is a power of two (otherwise we could fail to
// find an empty slot). This is described in Knuth's The Art of Computer
// Programming Volume 2, Chapter 6.4, exercise 20 (solution in the
// appendix, 2nd edition).
final size =
_roundUpToPowerOfTwo(max(_data.length, _HashBase._INITIAL_INDEX_SIZE));
final newIndex = new Uint32List(size);
@ -471,6 +476,7 @@ mixin _LinkedHashMapMixin<K, V> on _HashBase, _EqualsAndHashCode {
int i = _HashBase._firstProbe(fullHash, sizeMask);
int firstDeleted = -1;
int pair = index[i];
int probeDistance = 1;
while (pair != _HashBase._UNUSED_PAIR) {
if (pair == _HashBase._DELETED_PAIR) {
if (firstDeleted < 0) {
@ -485,7 +491,8 @@ mixin _LinkedHashMapMixin<K, V> on _HashBase, _EqualsAndHashCode {
}
}
}
i = _HashBase._nextProbe(i, sizeMask);
i = _HashBase._nextProbe(i, probeDistance, sizeMask);
probeDistance += 1;
pair = index[i];
}
return firstDeleted >= 0 ? -firstDeleted : -i;
@ -539,6 +546,7 @@ mixin _LinkedHashMapMixin<K, V> on _HashBase, _EqualsAndHashCode {
final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);
int i = _HashBase._firstProbe(fullHash, sizeMask);
int pair = _index[i];
int probeDistance = 1;
while (pair != _HashBase._UNUSED_PAIR) {
if (pair != _HashBase._DELETED_PAIR) {
final int entry = hashPattern ^ pair;
@ -554,7 +562,8 @@ mixin _LinkedHashMapMixin<K, V> on _HashBase, _EqualsAndHashCode {
}
}
}
i = _HashBase._nextProbe(i, sizeMask);
i = _HashBase._nextProbe(i, probeDistance, sizeMask);
probeDistance += 1;
pair = _index[i];
}
return null;
@ -569,6 +578,7 @@ mixin _LinkedHashMapMixin<K, V> on _HashBase, _EqualsAndHashCode {
final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);
int i = _HashBase._firstProbe(fullHash, sizeMask);
int pair = _index[i];
int probeDistance = 1;
while (pair != _HashBase._UNUSED_PAIR) {
if (pair != _HashBase._DELETED_PAIR) {
final int entry = hashPattern ^ pair;
@ -579,7 +589,8 @@ mixin _LinkedHashMapMixin<K, V> on _HashBase, _EqualsAndHashCode {
}
}
}
i = _HashBase._nextProbe(i, sizeMask);
i = _HashBase._nextProbe(i, probeDistance, sizeMask);
probeDistance += 1;
pair = _index[i];
}
return _data;
@ -843,6 +854,7 @@ mixin _LinkedHashSetMixin<E> on _HashBase, _EqualsAndHashCode {
int i = _HashBase._firstProbe(fullHash, sizeMask);
int firstDeleted = -1;
int pair = _index[i];
int probeDistance = 1;
while (pair != _HashBase._UNUSED_PAIR) {
if (pair == _HashBase._DELETED_PAIR) {
if (firstDeleted < 0) {
@ -854,7 +866,8 @@ mixin _LinkedHashSetMixin<E> on _HashBase, _EqualsAndHashCode {
return false;
}
}
i = _HashBase._nextProbe(i, sizeMask);
i = _HashBase._nextProbe(i, probeDistance, sizeMask);
probeDistance += 1;
pair = _index[i];
}
if (_usedData == _data.length) {
@ -879,6 +892,7 @@ mixin _LinkedHashSetMixin<E> on _HashBase, _EqualsAndHashCode {
final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);
int i = _HashBase._firstProbe(fullHash, sizeMask);
int pair = _index[i];
int probeDistance = 1;
while (pair != _HashBase._UNUSED_PAIR) {
if (pair != _HashBase._DELETED_PAIR) {
final int d = hashPattern ^ pair;
@ -886,7 +900,8 @@ mixin _LinkedHashSetMixin<E> on _HashBase, _EqualsAndHashCode {
return _data[d]; // Note: Must return the existing key.
}
}
i = _HashBase._nextProbe(i, sizeMask);
i = _HashBase._nextProbe(i, probeDistance, sizeMask);
probeDistance += 1;
pair = _index[i];
}
return _data;
@ -907,6 +922,7 @@ mixin _LinkedHashSetMixin<E> on _HashBase, _EqualsAndHashCode {
final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);
int i = _HashBase._firstProbe(fullHash, sizeMask);
int pair = _index[i];
int probeDistance = 1;
while (pair != _HashBase._UNUSED_PAIR) {
if (pair != _HashBase._DELETED_PAIR) {
final int d = hashPattern ^ pair;
@ -917,7 +933,8 @@ mixin _LinkedHashSetMixin<E> on _HashBase, _EqualsAndHashCode {
return true;
}
}
i = _HashBase._nextProbe(i, sizeMask);
i = _HashBase._nextProbe(i, probeDistance, sizeMask);
probeDistance += 1;
pair = _index[i];
}
return false;
@ -928,6 +945,11 @@ mixin _LinkedHashSetMixin<E> on _HashBase, _EqualsAndHashCode {
// This method is called by [_rehashObjects] (see above).
void _regenerateIndex() {
// Because we use quadratic (actually triangle number) probing it is
// important that the size is a power of two (otherwise we could fail to
// find an empty slot). This is described in Knuth's The Art of Computer
// Programming Volume 2, Chapter 6.4, exercise 20 (solution in the
// appendix, 2nd edition).
final size =
_roundUpToPowerOfTwo(max(_data.length, _HashBase._INITIAL_INDEX_SIZE));
_index = _data.length == 0 ? _uninitializedIndex : new Uint32List(size);
@ -1013,6 +1035,11 @@ mixin _ImmutableLinkedHashSetMixin<E>
}
void _createIndex() {
// Because we use quadratic (actually triangle number) probing it is
// important that the size is a power of two (otherwise we could fail to
// find an empty slot). This is described in Knuth's The Art of Computer
// Programming Volume 2, Chapter 6.4, exercise 20 (solution in the
// appendix, 2nd edition).
final size = _roundUpToPowerOfTwo(
max(_data.length * 2, _HashBase._INITIAL_INDEX_SIZE));
final index = new Uint32List(size);
@ -1030,6 +1057,7 @@ mixin _ImmutableLinkedHashSetMixin<E>
int i = _HashBase._firstProbe(fullHash, sizeMask);
int pair = index[i];
int probeDistance = 1;
while (pair != _HashBase._UNUSED_PAIR) {
assert(pair != _HashBase._DELETED_PAIR);
@ -1039,7 +1067,8 @@ mixin _ImmutableLinkedHashSetMixin<E>
assert(!_equals(key, _data[d]));
}
i = _HashBase._nextProbe(i, sizeMask);
i = _HashBase._nextProbe(i, probeDistance, sizeMask);
probeDistance += 1;
pair = index[i];
}