From 959854769c721c161bf76192bb5371b8716b11a2 Mon Sep 17 00:00:00 2001 From: asiva Date: Tue, 20 Sep 2022 23:35:25 +0000 Subject: [PATCH] [VM/Library] - Adjust next probe to be a quadratic probe in _HashBase Adjust next probe to a quadratic probe so that it matches up with the internal VM hash map implementation.This avoids some issues with hash code clustering and uses the probing strategy in the VM's HashTable. Change-Id: I6af560b971c2e6fa745e3b3414e2d8a9075e7ff5 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/239004 Reviewed-by: Ryan Macnak Commit-Queue: Siva Annamalai --- sdk/lib/_internal/vm/lib/compact_hash.dart | 47 +++++++++++++++++----- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/sdk/lib/_internal/vm/lib/compact_hash.dart b/sdk/lib/_internal/vm/lib/compact_hash.dart index b5566c606c2..f3105ceb11c 100644 --- a/sdk/lib/_internal/vm/lib/compact_hash.dart +++ b/sdk/lib/_internal/vm/lib/compact_hash.dart @@ -180,14 +180,14 @@ mixin _HashBase on _HashAbstractBase { return (maskedHash == 0) ? (size >> 1) : maskedHash * (size >> 1); } - // Linear probing. + // Quadratic probing. static int _firstProbe(int fullHash, int sizeMask) { final int i = fullHash & sizeMask; // Light, fast shuffle to mitigate bad hashCode (e.g., sequential). return ((i << 1) + i) & sizeMask; } - static int _nextProbe(int i, int sizeMask) => (i + 1) & sizeMask; + static int _nextProbe(int i, int incr, int sizeMask) => (i + incr) & sizeMask; // A self-loop is used to mark a deleted key or value. static bool _isDeleted(List data, Object? keyOrValue) => @@ -334,6 +334,11 @@ mixin _ImmutableLinkedHashMapMixin } void _createIndex() { + // Because we use quadratic (actually triangle number) probing it is + // important that the size is a power of two (otherwise we could fail to + // find an empty slot). This is described in Knuth's The Art of Computer + // Programming Volume 2, Chapter 6.4, exercise 20 (solution in the + // appendix, 2nd edition). final size = _roundUpToPowerOfTwo(max(_data.length, _HashBase._INITIAL_INDEX_SIZE)); final newIndex = new Uint32List(size); @@ -471,6 +476,7 @@ mixin _LinkedHashMapMixin on _HashBase, _EqualsAndHashCode { int i = _HashBase._firstProbe(fullHash, sizeMask); int firstDeleted = -1; int pair = index[i]; + int probeDistance = 1; while (pair != _HashBase._UNUSED_PAIR) { if (pair == _HashBase._DELETED_PAIR) { if (firstDeleted < 0) { @@ -485,7 +491,8 @@ mixin _LinkedHashMapMixin on _HashBase, _EqualsAndHashCode { } } } - i = _HashBase._nextProbe(i, sizeMask); + i = _HashBase._nextProbe(i, probeDistance, sizeMask); + probeDistance += 1; pair = index[i]; } return firstDeleted >= 0 ? -firstDeleted : -i; @@ -539,6 +546,7 @@ mixin _LinkedHashMapMixin on _HashBase, _EqualsAndHashCode { final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size); int i = _HashBase._firstProbe(fullHash, sizeMask); int pair = _index[i]; + int probeDistance = 1; while (pair != _HashBase._UNUSED_PAIR) { if (pair != _HashBase._DELETED_PAIR) { final int entry = hashPattern ^ pair; @@ -554,7 +562,8 @@ mixin _LinkedHashMapMixin on _HashBase, _EqualsAndHashCode { } } } - i = _HashBase._nextProbe(i, sizeMask); + i = _HashBase._nextProbe(i, probeDistance, sizeMask); + probeDistance += 1; pair = _index[i]; } return null; @@ -569,6 +578,7 @@ mixin _LinkedHashMapMixin on _HashBase, _EqualsAndHashCode { final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size); int i = _HashBase._firstProbe(fullHash, sizeMask); int pair = _index[i]; + int probeDistance = 1; while (pair != _HashBase._UNUSED_PAIR) { if (pair != _HashBase._DELETED_PAIR) { final int entry = hashPattern ^ pair; @@ -579,7 +589,8 @@ mixin _LinkedHashMapMixin on _HashBase, _EqualsAndHashCode { } } } - i = _HashBase._nextProbe(i, sizeMask); + i = _HashBase._nextProbe(i, probeDistance, sizeMask); + probeDistance += 1; pair = _index[i]; } return _data; @@ -843,6 +854,7 @@ mixin _LinkedHashSetMixin on _HashBase, _EqualsAndHashCode { int i = _HashBase._firstProbe(fullHash, sizeMask); int firstDeleted = -1; int pair = _index[i]; + int probeDistance = 1; while (pair != _HashBase._UNUSED_PAIR) { if (pair == _HashBase._DELETED_PAIR) { if (firstDeleted < 0) { @@ -854,7 +866,8 @@ mixin _LinkedHashSetMixin on _HashBase, _EqualsAndHashCode { return false; } } - i = _HashBase._nextProbe(i, sizeMask); + i = _HashBase._nextProbe(i, probeDistance, sizeMask); + probeDistance += 1; pair = _index[i]; } if (_usedData == _data.length) { @@ -879,6 +892,7 @@ mixin _LinkedHashSetMixin on _HashBase, _EqualsAndHashCode { final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size); int i = _HashBase._firstProbe(fullHash, sizeMask); int pair = _index[i]; + int probeDistance = 1; while (pair != _HashBase._UNUSED_PAIR) { if (pair != _HashBase._DELETED_PAIR) { final int d = hashPattern ^ pair; @@ -886,7 +900,8 @@ mixin _LinkedHashSetMixin on _HashBase, _EqualsAndHashCode { return _data[d]; // Note: Must return the existing key. } } - i = _HashBase._nextProbe(i, sizeMask); + i = _HashBase._nextProbe(i, probeDistance, sizeMask); + probeDistance += 1; pair = _index[i]; } return _data; @@ -907,6 +922,7 @@ mixin _LinkedHashSetMixin on _HashBase, _EqualsAndHashCode { final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size); int i = _HashBase._firstProbe(fullHash, sizeMask); int pair = _index[i]; + int probeDistance = 1; while (pair != _HashBase._UNUSED_PAIR) { if (pair != _HashBase._DELETED_PAIR) { final int d = hashPattern ^ pair; @@ -917,7 +933,8 @@ mixin _LinkedHashSetMixin on _HashBase, _EqualsAndHashCode { return true; } } - i = _HashBase._nextProbe(i, sizeMask); + i = _HashBase._nextProbe(i, probeDistance, sizeMask); + probeDistance += 1; pair = _index[i]; } return false; @@ -928,6 +945,11 @@ mixin _LinkedHashSetMixin on _HashBase, _EqualsAndHashCode { // This method is called by [_rehashObjects] (see above). void _regenerateIndex() { + // Because we use quadratic (actually triangle number) probing it is + // important that the size is a power of two (otherwise we could fail to + // find an empty slot). This is described in Knuth's The Art of Computer + // Programming Volume 2, Chapter 6.4, exercise 20 (solution in the + // appendix, 2nd edition). final size = _roundUpToPowerOfTwo(max(_data.length, _HashBase._INITIAL_INDEX_SIZE)); _index = _data.length == 0 ? _uninitializedIndex : new Uint32List(size); @@ -1013,6 +1035,11 @@ mixin _ImmutableLinkedHashSetMixin } void _createIndex() { + // Because we use quadratic (actually triangle number) probing it is + // important that the size is a power of two (otherwise we could fail to + // find an empty slot). This is described in Knuth's The Art of Computer + // Programming Volume 2, Chapter 6.4, exercise 20 (solution in the + // appendix, 2nd edition). final size = _roundUpToPowerOfTwo( max(_data.length * 2, _HashBase._INITIAL_INDEX_SIZE)); final index = new Uint32List(size); @@ -1030,6 +1057,7 @@ mixin _ImmutableLinkedHashSetMixin int i = _HashBase._firstProbe(fullHash, sizeMask); int pair = index[i]; + int probeDistance = 1; while (pair != _HashBase._UNUSED_PAIR) { assert(pair != _HashBase._DELETED_PAIR); @@ -1039,7 +1067,8 @@ mixin _ImmutableLinkedHashSetMixin assert(!_equals(key, _data[d])); } - i = _HashBase._nextProbe(i, sizeMask); + i = _HashBase._nextProbe(i, probeDistance, sizeMask); + probeDistance += 1; pair = index[i]; }