mirror of
https://github.com/dart-lang/sdk
synced 2024-11-02 14:32:24 +00:00
c93f924c82
When setRange is called on a TypedData receiver and the source is also a TypedData object with the same element size and clamping is not required, the VM implementation now calls _boundsCheckAndMemcpyN for element size N. The generated IL for these methods performs the copy using the MemoryCopy instruction (mostly, see the note below). Since the two TypedData objects might have the same underlying buffer, the CL adds a can_overlap flag to the MemoryCopy instruction which checks for overlapping regions. If can_overlap is set, then the copy is performed backwards instead of forwards when needed to ensure that elements of the source region are read before they are overwritten. The existing uses of the MemoryCopy instruction are adjusted as follows: * The IL generated for copyRangeFromUint8ListToOneByteString passes false for can_overlap, as all uses currently ensure that the OneByteString is non-external and thus cannot overlap. * The IL generated for _memCopy, used by the FFI library, passes true for can_overlap, as there is no guarantee that the regions pointed at by the Pointer objects do not overlap. The MemoryCopy instruction has also been adjusted so that all numeric inputs (the two start offsets and the length) are either boxed or unboxed instead of just the length. This exposed an issue in the inliner, where unboxed constants in the callee graph were replaced with boxed constants when inlining into the caller graph, since withList calls setRange with constant starting offsets of 0. Now the representation of constants in the callee graph are preserved when inlining the callee graph into the caller graph. Fixes https://github.com/dart-lang/sdk/issues/51237 by using TMP and TMP2 for the LDP/STP calls in the 16-byte element size case, so no temporaries need to be allocated for the instruction. On ARM when not unrolling the memory copy loop, uses TMP and a single additional temporary for LDM/STM calls in the 8-byte and 16-byte element cases, with the latter just using two LDM/STM calls within the loop, a different approach than the one described in https://github.com/dart-lang/sdk/issues/51229 . Note: Once the number of elements being copied reaches a certain threshold (1048576 on X86, 256 otherwise), _boundsCheckAndMemcpyN instead calls _nativeSetRange, which is a native call that uses memmove from the standard C library for non-clamped inputs. It does this because the code currently emitted for MemoryCopy performs poorly compared to the more optimized memmove implementation when copying larger regions of memory. Notable benchmark changes for dart-aot: * X64 * TypedDataDuplicate.*.fromList improvement from ~13%-~250% * Uf8Encode.*.10 improvement from ~50%-~75% * MapCopy.Map.*.of.Map.* improvement from ~13%-~65% * MemoryCopy.*.setRange.* improvement from ~13%-~500% * ARM7 * Uf8Encode.*.10 improvement from ~35%-~70% * MapCopy.Map.*.of.Map.* improvement from ~6%-~75% * MemoryCopy.*.setRange.{8,64} improvement from ~22%-~500% * Improvement of ~100%-~200% for MemoryCopy.512.setRange.*.Double * Regression of ~40% for MemoryCopy.512.setRange.*.Uint8 * Regression of ~85% for MemoryCopy.4096.setRange.*.Uint8 * ARM8 * Uf8Encode.*.10 improvement from ~35%-~70% * MapCopy.Map.*.of.Map.* improvement from ~7%-~75% * MemoryCopy.*.setRange.{8,64} improvement from ~22%-~500% * Improvement of ~75%-~160% for MemoryCopy.512.setRange.*.Double * Regression of ~40% for MemoryCopy.512.setRange.*.Uint8 * Regression of ~85% for MemoryCopy.4096.setRange.*.Uint8 TEST=vm/cc/IRTest_Memory, co19{,_2}/LibTest/typed_data, lib{,_2}/typed_data, corelib{,_2}/list_test Issue: https://github.com/dart-lang/sdk/issues/42072 Issue: b/294114694 Issue: b/259315681 Change-Id: Ic75521c5fe10b952b5b9ce5f2020c7e3f03672a9 Cq-Include-Trybots: luci.dart.try:vm-aot-linux-debug-simarm_x64-try,vm-aot-linux-debug-simriscv64-try,vm-aot-linux-debug-x64-try,vm-aot-linux-debug-x64c-try,vm-kernel-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-linux-debug-ia32-try,vm-linux-debug-simriscv64-try,vm-linux-debug-x64-try,vm-linux-debug-x64c-try,vm-mac-debug-arm64-try,vm-mac-debug-x64-try,vm-aot-linux-release-simarm64-try,vm-aot-linux-release-simarm_x64-try,vm-aot-linux-release-x64-try,vm-aot-mac-release-arm64-try,vm-aot-mac-release-x64-try,vm-ffi-qemu-linux-release-riscv64-try,vm-ffi-qemu-linux-release-arm-try,vm-aot-msan-linux-release-x64-try,vm-msan-linux-release-x64-try,vm-aot-tsan-linux-release-x64-try,vm-tsan-linux-release-x64-try,vm-linux-release-ia32-try,vm-linux-release-simarm-try,vm-linux-release-simarm64-try,vm-linux-release-x64-try,vm-mac-release-arm64-try,vm-mac-release-x64-try,vm-kernel-precomp-linux-release-x64-try,vm-aot-android-release-arm64c-try,vm-ffi-android-debug-arm64c-try Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/319521 Reviewed-by: Daco Harkes <dacoharkes@google.com> Reviewed-by: Alexander Markov <alexmarkov@google.com> Commit-Queue: Tess Strickland <sstrickl@google.com>
148 lines
7.1 KiB
C++
148 lines
7.1 KiB
C++
// Copyright (c) 2017, the Dart project authors. Please see the AUTHORS file
|
|
// for details. All rights reserved. Use of this source code is governed by a
|
|
// BSD-style license that can be found in the LICENSE file.
|
|
|
|
#ifndef RUNTIME_VM_CONSTANTS_X86_H_
|
|
#define RUNTIME_VM_CONSTANTS_X86_H_
|
|
|
|
#include "platform/assert.h"
|
|
|
|
namespace dart {
|
|
|
|
enum Condition {
|
|
OVERFLOW = 0,
|
|
NO_OVERFLOW = 1,
|
|
BELOW = 2,
|
|
ABOVE_EQUAL = 3,
|
|
EQUAL = 4,
|
|
NOT_EQUAL = 5,
|
|
BELOW_EQUAL = 6,
|
|
ABOVE = 7,
|
|
SIGN = 8,
|
|
NOT_SIGN = 9,
|
|
PARITY_EVEN = 10,
|
|
PARITY_ODD = 11,
|
|
LESS = 12,
|
|
GREATER_EQUAL = 13,
|
|
LESS_EQUAL = 14,
|
|
GREATER = 15,
|
|
|
|
ZERO = EQUAL,
|
|
NOT_ZERO = NOT_EQUAL,
|
|
NEGATIVE = SIGN,
|
|
POSITIVE = NOT_SIGN,
|
|
CARRY = BELOW,
|
|
NOT_CARRY = ABOVE_EQUAL,
|
|
|
|
// Platform-independent variants declared for all platforms
|
|
// EQUAL,
|
|
// NOT_EQUAL,
|
|
// LESS,
|
|
// LESS_EQUAL,
|
|
// GREATER_EQUAL,
|
|
// GREATER,
|
|
UNSIGNED_LESS = BELOW,
|
|
UNSIGNED_LESS_EQUAL = BELOW_EQUAL,
|
|
UNSIGNED_GREATER = ABOVE,
|
|
UNSIGNED_GREATER_EQUAL = ABOVE_EQUAL,
|
|
|
|
kInvalidCondition = 16
|
|
};
|
|
|
|
static inline Condition InvertCondition(Condition c) {
|
|
COMPILE_ASSERT((OVERFLOW ^ NO_OVERFLOW) == 1);
|
|
COMPILE_ASSERT((BELOW ^ ABOVE_EQUAL) == 1);
|
|
COMPILE_ASSERT((EQUAL ^ NOT_EQUAL) == 1);
|
|
COMPILE_ASSERT((BELOW_EQUAL ^ ABOVE) == 1);
|
|
COMPILE_ASSERT((SIGN ^ NOT_SIGN) == 1);
|
|
COMPILE_ASSERT((PARITY_EVEN ^ PARITY_ODD) == 1);
|
|
COMPILE_ASSERT((LESS ^ GREATER_EQUAL) == 1);
|
|
COMPILE_ASSERT((LESS_EQUAL ^ GREATER) == 1);
|
|
ASSERT(c != kInvalidCondition);
|
|
return static_cast<Condition>(c ^ 1);
|
|
}
|
|
|
|
#define X86_ZERO_OPERAND_1_BYTE_INSTRUCTIONS(F) \
|
|
F(ret, 0xC3) \
|
|
F(leave, 0xC9) \
|
|
F(hlt, 0xF4) \
|
|
F(cld, 0xFC) \
|
|
F(std, 0xFD) \
|
|
F(int3, 0xCC) \
|
|
F(pushad, 0x60) \
|
|
F(popad, 0x61) \
|
|
F(pushfd, 0x9C) \
|
|
F(popfd, 0x9D) \
|
|
F(sahf, 0x9E) \
|
|
F(cdq, 0x99) \
|
|
F(fwait, 0x9B) \
|
|
F(movsb, 0xA4) \
|
|
F(movs, 0xA5) /* Size suffix added in code */ \
|
|
F(cmpsb, 0xA6) \
|
|
F(cmps, 0xA7) /* Size suffix added in code */
|
|
|
|
// clang-format off
|
|
#define X86_ALU_CODES(F) \
|
|
F(and, 4) \
|
|
F(or, 1) \
|
|
F(xor, 6) \
|
|
F(add, 0) \
|
|
F(adc, 2) \
|
|
F(sub, 5) \
|
|
F(sbb, 3) \
|
|
F(cmp, 7)
|
|
|
|
#define XMM_ALU_CODES(F) \
|
|
F(bad0, 0) \
|
|
F(sqrt, 1) \
|
|
F(rsqrt, 2) \
|
|
F(rcp, 3) \
|
|
F(and, 4) \
|
|
F(bad1, 5) \
|
|
F(or, 6) \
|
|
F(xor, 7) \
|
|
F(add, 8) \
|
|
F(mul, 9) \
|
|
F(bad2, 0xA) \
|
|
F(bad3, 0xB) \
|
|
F(sub, 0xC) \
|
|
F(min, 0xD) \
|
|
F(div, 0xE) \
|
|
F(max, 0xF)
|
|
// clang-format on
|
|
|
|
// Table 3-1, first part
|
|
#define XMM_CONDITIONAL_CODES(F) \
|
|
F(eq, 0) \
|
|
F(lt, 1) \
|
|
F(le, 2) \
|
|
F(unord, 3) \
|
|
F(neq, 4) \
|
|
F(nlt, 5) \
|
|
F(nle, 6) \
|
|
F(ord, 7)
|
|
|
|
#define X86_CONDITIONAL_SUFFIXES(F) \
|
|
F(o, OVERFLOW) \
|
|
F(no, NO_OVERFLOW) \
|
|
F(c, CARRY) \
|
|
F(nc, NOT_CARRY) \
|
|
F(z, ZERO) \
|
|
F(nz, NOT_ZERO) \
|
|
F(na, BELOW_EQUAL) \
|
|
F(a, ABOVE) \
|
|
F(s, SIGN) \
|
|
F(ns, NOT_SIGN) \
|
|
F(pe, PARITY_EVEN) \
|
|
F(po, PARITY_ODD) \
|
|
F(l, LESS) \
|
|
F(ge, GREATER_EQUAL) \
|
|
F(le, LESS_EQUAL) \
|
|
F(g, GREATER) \
|
|
/* Some alternative names */ \
|
|
F(e, EQUAL) \
|
|
F(ne, NOT_EQUAL)
|
|
|
|
} // namespace dart
|
|
|
|
#endif // RUNTIME_VM_CONSTANTS_X86_H_
|