2013-03-07 21:50:33 +00:00
|
|
|
// Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
|
|
|
|
// for details. All rights reserved. Use of this source code is governed by a
|
|
|
|
// BSD-style license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
#include "vm/bootstrap_natives.h"
|
|
|
|
|
|
|
|
#include "include/dart_api.h"
|
|
|
|
|
|
|
|
#include "vm/exceptions.h"
|
|
|
|
#include "vm/native_entry.h"
|
|
|
|
#include "vm/object.h"
|
2019-09-30 20:19:03 +00:00
|
|
|
#include "vm/object_store.h"
|
2013-03-07 21:50:33 +00:00
|
|
|
|
|
|
|
namespace dart {
|
|
|
|
|
|
|
|
// TypedData.
|
|
|
|
|
2021-06-08 19:15:38 +00:00
|
|
|
DEFINE_NATIVE_ENTRY(TypedDataBase_length, 0, 1) {
|
|
|
|
GET_NON_NULL_NATIVE_ARGUMENT(TypedDataBase, array, arguments->NativeArgAt(0));
|
|
|
|
return Smi::New(array.Length());
|
2013-03-07 21:50:33 +00:00
|
|
|
}
|
|
|
|
|
2019-03-15 21:24:15 +00:00
|
|
|
DEFINE_NATIVE_ENTRY(TypedDataView_offsetInBytes, 0, 1) {
|
|
|
|
// "this" is either a _*ArrayView class or _ByteDataView.
|
|
|
|
GET_NON_NULL_NATIVE_ARGUMENT(Instance, instance, arguments->NativeArgAt(0));
|
|
|
|
ASSERT(instance.IsTypedDataView());
|
|
|
|
return TypedDataView::Cast(instance).offset_in_bytes();
|
|
|
|
}
|
|
|
|
|
|
|
|
DEFINE_NATIVE_ENTRY(TypedDataView_typedData, 0, 1) {
|
|
|
|
// "this" is either a _*ArrayView class or _ByteDataView.
|
|
|
|
GET_NON_NULL_NATIVE_ARGUMENT(Instance, instance, arguments->NativeArgAt(0));
|
|
|
|
ASSERT(instance.IsTypedDataView());
|
|
|
|
return TypedDataView::Cast(instance).typed_data();
|
|
|
|
}
|
|
|
|
|
[vm/compiler] Limit exposure of untagged pointers to managed memory.
After https://dart-review.googlesource.com/c/sdk/+/330600, there were
more chances for the optimizing compiler to introduce or move
GC-triggering instructions like allocations or boxings between the
retrieval of an untagged pointer to GC-moveable memory and its use.
To limit the chance of this happening, this CL removes the explicit
loading of the untagged payload address when building the initial
flow graph in most cases when the array is not known to be an external
array (an external string, an external typed data object, or an FFI
Pointer).
The remaining case is during view allocation, which extracts the
payload address of the base typed data object underlying the view
(which may be GC-movable) to calculate the payload address that should
be stored in the data field of the view object. See
https://github.com/dart-lang/sdk/issues/54884.
During canonicalization of LoadIndexed, StoreIndexed, and MemoryCopy
instructions, if the cid of an array input is an external array
(external string, external typed data object, or Pointer), then a
LoadField instruction that extracts the untagged payload address
is inserted before the instruction and the corresponding input is
rebound to that LoadField instruction.
Once all compiler passes that involve code motion have been performed,
a new pass looks for LoadIndexed, StoreIndexed, or MemoryCopy where
the cid stored in the instruction for the array is a typed data cid.
In these cases, if the array is not an internal typed data object,
then the payload address is extracted. Waiting until this point ensures
that no GC-triggering instructions are inserted between the extraction
of the payload address and the use. (Internal typed data objects are
left as-is because the payload address is inside the object itself
and doesn't require indirection through the data field of the object).
This CL also replaces code conditional on the array cid with code
that is instead conditional on the array element representation in
cases where it makes sense to do so, since this is a less brittle
check than checking the array cid (e.g., checking for kUnboxedInt8
to load, store, or copy an signed byte from an array instead of
listing all possible array cids that store signed bytes).
This CL also fixes an issue with the ARM64 assembler where calling
LoadFromOffset with an Address that has a non-Offset type would
silently generate bad code instead of triggering the ASSERT in
PrepareLargeOffset.
TEST=vm/dart/typed_list_index_checkbound_il_test
Issue: https://github.com/dart-lang/sdk/issues/54710
Cq-Include-Trybots: luci.dart.try:vm-aot-android-release-arm64c-try,vm-aot-android-release-arm_x64-try,vm-aot-linux-debug-x64-try,vm-aot-linux-debug-x64c-try,vm-aot-mac-release-arm64-try,vm-aot-mac-release-x64-try,vm-aot-obfuscate-linux-release-x64-try,vm-aot-optimization-level-linux-release-x64-try,vm-aot-win-debug-arm64-try,vm-appjit-linux-debug-x64-try,vm-asan-linux-release-x64-try,vm-checked-mac-release-arm64-try,vm-eager-optimization-linux-release-ia32-try,vm-eager-optimization-linux-release-x64-try,vm-ffi-android-debug-arm-try,vm-ffi-android-debug-arm64c-try,vm-ffi-qemu-linux-release-arm-try,vm-ffi-qemu-linux-release-riscv64-try,vm-fuchsia-release-x64-try,vm-linux-debug-ia32-try,vm-linux-debug-x64-try,vm-linux-debug-x64c-try,vm-mac-debug-arm64-try,vm-mac-debug-x64-try,vm-msan-linux-release-x64-try,vm-reload-linux-debug-x64-try,vm-reload-rollback-linux-debug-x64-try,vm-ubsan-linux-release-x64-try,vm-win-debug-arm64-try,vm-win-debug-x64-try,vm-win-release-ia32-try
Change-Id: I25b5f314943e9254d3d28986d720a5d47f12feeb
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/352363
Reviewed-by: Daco Harkes <dacoharkes@google.com>
Reviewed-by: Ryan Macnak <rmacnak@google.com>
Commit-Queue: Tess Strickland <sstrickl@google.com>
Reviewed-by: Alexander Markov <alexmarkov@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
2024-03-22 10:12:39 +00:00
|
|
|
static bool IsTypedDataUint8ArrayClassId(intptr_t cid) {
|
|
|
|
if (!IsTypedDataBaseClassId(cid)) return false;
|
|
|
|
const intptr_t internal_cid =
|
|
|
|
cid - ((cid - kFirstTypedDataCid) % kNumTypedDataCidRemainders) +
|
|
|
|
kTypedDataCidRemainderInternal;
|
|
|
|
return internal_cid == kTypedDataUint8ArrayCid ||
|
|
|
|
internal_cid == kTypedDataUint8ClampedArrayCid;
|
2014-03-11 16:56:55 +00:00
|
|
|
}
|
|
|
|
|
[vm/compiler] Create leaf runtime entry for memmove.
Instead of making a StaticCall to _TypedListBase.nativeSetRange
inside _memMoveN, make a CCall to the memmove leaf runtime entry.
Rename _TypedListBase._nativeSetRange to _setClampedRange, since
it's now only used when per-element clamping is necessary.
Fix the load optimizer so that loads of unboxed fields from freshly
allocated objects do not have the tagged null value forwarded
as their initial post-allocation value.
TEST=co19{,_2}/LibTest/typed_data lib{,_2}/typed_data
corelib{,_2}/list_test
vm/cc/LoadOptimizer_LoadDataFieldOfNewTypedData
Issue: https://github.com/dart-lang/sdk/issues/42072
Change-Id: Ib82e24a5b3287fa53099fffd3b563a27d777507e
Cq-Include-Trybots: luci.dart.try:vm-aot-linux-debug-simarm_x64-try,vm-aot-linux-debug-x64-try,vm-aot-linux-debug-x64c-try,vm-kernel-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-linux-debug-x64-try,vm-linux-debug-x64c-try,vm-mac-debug-arm64-try,vm-aot-linux-release-simarm_x64-try,vm-aot-linux-release-x64-try,vm-aot-mac-release-arm64-try,vm-aot-msan-linux-release-x64-try,vm-msan-linux-release-x64-try,vm-aot-tsan-linux-release-x64-try,vm-tsan-linux-release-x64-try,vm-linux-release-x64-try,vm-mac-release-arm64-try,vm-kernel-precomp-linux-release-x64-try
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/324080
Reviewed-by: Alexander Markov <alexmarkov@google.com>
Commit-Queue: Tess Strickland <sstrickl@google.com>
2023-09-11 21:25:09 +00:00
|
|
|
DEFINE_NATIVE_ENTRY(TypedDataBase_setClampedRange, 0, 5) {
|
[vm/compiler] Move setRange bounds checking entirely into Dart.
The bounds checking was implemented in Dart previously, but this
removes _checkSetRangeArguments, inlining it into
_TypedListBase.setRange, renames _checkBoundsAndMemcpyN to _memMoveN
since it no longer performs bounds checking, and also removes the now
unneeded bounds checking from the native function TypedData_setRange.
TEST=co19{,_2}/LibTest/typed_data lib{,_2}/typed_data
corelib{,_2}/list_test
Issue: https://github.com/dart-lang/sdk/issues/42072
Cq-Include-Trybots: luci.dart.try:vm-aot-linux-debug-simarm_x64-try,vm-aot-linux-debug-x64-try,vm-aot-linux-debug-x64c-try,vm-kernel-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-linux-debug-x64-try,vm-linux-debug-x64c-try,vm-mac-debug-arm64-try,vm-aot-linux-release-simarm_x64-try,vm-aot-linux-release-x64-try,vm-aot-mac-release-arm64-try,vm-linux-release-x64-try,vm-mac-release-arm64-try,vm-kernel-precomp-linux-release-x64-try
Change-Id: I85ec751708f603f68729f4109d7339dd8407ae77
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/324102
Reviewed-by: Alexander Markov <alexmarkov@google.com>
Commit-Queue: Tess Strickland <sstrickl@google.com>
2023-09-05 17:10:51 +00:00
|
|
|
// This is called after bounds checking, so the numeric inputs are
|
|
|
|
// guaranteed to be Smis, and the length is guaranteed to be non-zero.
|
2021-06-08 19:15:38 +00:00
|
|
|
const TypedDataBase& dst =
|
|
|
|
TypedDataBase::CheckedHandle(zone, arguments->NativeArgAt(0));
|
[vm/compiler] Further optimize setRange on TypedData receivers.
When setRange is called on a TypedData receiver and the source is also
a TypedData object with the same element size and clamping is not
required, the VM implementation now calls _boundsCheckAndMemcpyN for
element size N. The generated IL for these methods performs the copy
using the MemoryCopy instruction (mostly, see the note below).
Since the two TypedData objects might have the same underlying
buffer, the CL adds a can_overlap flag to the MemoryCopy instruction
which checks for overlapping regions. If can_overlap is set, then
the copy is performed backwards instead of forwards when needed
to ensure that elements of the source region are read before
they are overwritten.
The existing uses of the MemoryCopy instruction are adjusted as
follows:
* The IL generated for copyRangeFromUint8ListToOneByteString
passes false for can_overlap, as all uses currently ensure that
the OneByteString is non-external and thus cannot overlap.
* The IL generated for _memCopy, used by the FFI library, passes
true for can_overlap, as there is no guarantee that the regions
pointed at by the Pointer objects do not overlap.
The MemoryCopy instruction has also been adjusted so that all numeric
inputs (the two start offsets and the length) are either boxed or
unboxed instead of just the length. This exposed an issue
in the inliner, where unboxed constants in the callee graph were
replaced with boxed constants when inlining into the caller graph,
since withList calls setRange with constant starting offsets of 0.
Now the representation of constants in the callee graph are preserved
when inlining the callee graph into the caller graph.
Fixes https://github.com/dart-lang/sdk/issues/51237 by using TMP
and TMP2 for the LDP/STP calls in the 16-byte element size case, so no
temporaries need to be allocated for the instruction.
On ARM when not unrolling the memory copy loop, uses TMP and a single
additional temporary for LDM/STM calls in the 8-byte and 16-byte
element cases, with the latter just using two LDM/STM calls within
the loop, a different approach than the one described in
https://github.com/dart-lang/sdk/issues/51229 .
Note: Once the number of elements being copied reaches a certain
threshold (1048576 on X86, 256 otherwise), _boundsCheckAndMemcpyN
instead calls _nativeSetRange, which is a native call that uses memmove
from the standard C library for non-clamped inputs. It does this
because the code currently emitted for MemoryCopy performs poorly
compared to the more optimized memmove implementation when copying
larger regions of memory.
Notable benchmark changes for dart-aot:
* X64
* TypedDataDuplicate.*.fromList improvement from ~13%-~250%
* Uf8Encode.*.10 improvement from ~50%-~75%
* MapCopy.Map.*.of.Map.* improvement from ~13%-~65%
* MemoryCopy.*.setRange.* improvement from ~13%-~500%
* ARM7
* Uf8Encode.*.10 improvement from ~35%-~70%
* MapCopy.Map.*.of.Map.* improvement from ~6%-~75%
* MemoryCopy.*.setRange.{8,64} improvement from ~22%-~500%
* Improvement of ~100%-~200% for MemoryCopy.512.setRange.*.Double
* Regression of ~40% for MemoryCopy.512.setRange.*.Uint8
* Regression of ~85% for MemoryCopy.4096.setRange.*.Uint8
* ARM8
* Uf8Encode.*.10 improvement from ~35%-~70%
* MapCopy.Map.*.of.Map.* improvement from ~7%-~75%
* MemoryCopy.*.setRange.{8,64} improvement from ~22%-~500%
* Improvement of ~75%-~160% for MemoryCopy.512.setRange.*.Double
* Regression of ~40% for MemoryCopy.512.setRange.*.Uint8
* Regression of ~85% for MemoryCopy.4096.setRange.*.Uint8
TEST=vm/cc/IRTest_Memory, co19{,_2}/LibTest/typed_data,
lib{,_2}/typed_data, corelib{,_2}/list_test
Issue: https://github.com/dart-lang/sdk/issues/42072
Issue: b/294114694
Issue: b/259315681
Change-Id: Ic75521c5fe10b952b5b9ce5f2020c7e3f03672a9
Cq-Include-Trybots: luci.dart.try:vm-aot-linux-debug-simarm_x64-try,vm-aot-linux-debug-simriscv64-try,vm-aot-linux-debug-x64-try,vm-aot-linux-debug-x64c-try,vm-kernel-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-linux-debug-ia32-try,vm-linux-debug-simriscv64-try,vm-linux-debug-x64-try,vm-linux-debug-x64c-try,vm-mac-debug-arm64-try,vm-mac-debug-x64-try,vm-aot-linux-release-simarm64-try,vm-aot-linux-release-simarm_x64-try,vm-aot-linux-release-x64-try,vm-aot-mac-release-arm64-try,vm-aot-mac-release-x64-try,vm-ffi-qemu-linux-release-riscv64-try,vm-ffi-qemu-linux-release-arm-try,vm-aot-msan-linux-release-x64-try,vm-msan-linux-release-x64-try,vm-aot-tsan-linux-release-x64-try,vm-tsan-linux-release-x64-try,vm-linux-release-ia32-try,vm-linux-release-simarm-try,vm-linux-release-simarm64-try,vm-linux-release-x64-try,vm-mac-release-arm64-try,vm-mac-release-x64-try,vm-kernel-precomp-linux-release-x64-try,vm-aot-android-release-arm64c-try,vm-ffi-android-debug-arm64c-try
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/319521
Reviewed-by: Daco Harkes <dacoharkes@google.com>
Reviewed-by: Alexander Markov <alexmarkov@google.com>
Commit-Queue: Tess Strickland <sstrickl@google.com>
2023-09-04 14:38:27 +00:00
|
|
|
const Smi& dst_start_smi =
|
|
|
|
Smi::CheckedHandle(zone, arguments->NativeArgAt(1));
|
[vm/compiler] Move setRange bounds checking entirely into Dart.
The bounds checking was implemented in Dart previously, but this
removes _checkSetRangeArguments, inlining it into
_TypedListBase.setRange, renames _checkBoundsAndMemcpyN to _memMoveN
since it no longer performs bounds checking, and also removes the now
unneeded bounds checking from the native function TypedData_setRange.
TEST=co19{,_2}/LibTest/typed_data lib{,_2}/typed_data
corelib{,_2}/list_test
Issue: https://github.com/dart-lang/sdk/issues/42072
Cq-Include-Trybots: luci.dart.try:vm-aot-linux-debug-simarm_x64-try,vm-aot-linux-debug-x64-try,vm-aot-linux-debug-x64c-try,vm-kernel-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-linux-debug-x64-try,vm-linux-debug-x64c-try,vm-mac-debug-arm64-try,vm-aot-linux-release-simarm_x64-try,vm-aot-linux-release-x64-try,vm-aot-mac-release-arm64-try,vm-linux-release-x64-try,vm-mac-release-arm64-try,vm-kernel-precomp-linux-release-x64-try
Change-Id: I85ec751708f603f68729f4109d7339dd8407ae77
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/324102
Reviewed-by: Alexander Markov <alexmarkov@google.com>
Commit-Queue: Tess Strickland <sstrickl@google.com>
2023-09-05 17:10:51 +00:00
|
|
|
const Smi& length_smi = Smi::CheckedHandle(zone, arguments->NativeArgAt(2));
|
2021-06-08 19:15:38 +00:00
|
|
|
const TypedDataBase& src =
|
|
|
|
TypedDataBase::CheckedHandle(zone, arguments->NativeArgAt(3));
|
[vm/compiler] Further optimize setRange on TypedData receivers.
When setRange is called on a TypedData receiver and the source is also
a TypedData object with the same element size and clamping is not
required, the VM implementation now calls _boundsCheckAndMemcpyN for
element size N. The generated IL for these methods performs the copy
using the MemoryCopy instruction (mostly, see the note below).
Since the two TypedData objects might have the same underlying
buffer, the CL adds a can_overlap flag to the MemoryCopy instruction
which checks for overlapping regions. If can_overlap is set, then
the copy is performed backwards instead of forwards when needed
to ensure that elements of the source region are read before
they are overwritten.
The existing uses of the MemoryCopy instruction are adjusted as
follows:
* The IL generated for copyRangeFromUint8ListToOneByteString
passes false for can_overlap, as all uses currently ensure that
the OneByteString is non-external and thus cannot overlap.
* The IL generated for _memCopy, used by the FFI library, passes
true for can_overlap, as there is no guarantee that the regions
pointed at by the Pointer objects do not overlap.
The MemoryCopy instruction has also been adjusted so that all numeric
inputs (the two start offsets and the length) are either boxed or
unboxed instead of just the length. This exposed an issue
in the inliner, where unboxed constants in the callee graph were
replaced with boxed constants when inlining into the caller graph,
since withList calls setRange with constant starting offsets of 0.
Now the representation of constants in the callee graph are preserved
when inlining the callee graph into the caller graph.
Fixes https://github.com/dart-lang/sdk/issues/51237 by using TMP
and TMP2 for the LDP/STP calls in the 16-byte element size case, so no
temporaries need to be allocated for the instruction.
On ARM when not unrolling the memory copy loop, uses TMP and a single
additional temporary for LDM/STM calls in the 8-byte and 16-byte
element cases, with the latter just using two LDM/STM calls within
the loop, a different approach than the one described in
https://github.com/dart-lang/sdk/issues/51229 .
Note: Once the number of elements being copied reaches a certain
threshold (1048576 on X86, 256 otherwise), _boundsCheckAndMemcpyN
instead calls _nativeSetRange, which is a native call that uses memmove
from the standard C library for non-clamped inputs. It does this
because the code currently emitted for MemoryCopy performs poorly
compared to the more optimized memmove implementation when copying
larger regions of memory.
Notable benchmark changes for dart-aot:
* X64
* TypedDataDuplicate.*.fromList improvement from ~13%-~250%
* Uf8Encode.*.10 improvement from ~50%-~75%
* MapCopy.Map.*.of.Map.* improvement from ~13%-~65%
* MemoryCopy.*.setRange.* improvement from ~13%-~500%
* ARM7
* Uf8Encode.*.10 improvement from ~35%-~70%
* MapCopy.Map.*.of.Map.* improvement from ~6%-~75%
* MemoryCopy.*.setRange.{8,64} improvement from ~22%-~500%
* Improvement of ~100%-~200% for MemoryCopy.512.setRange.*.Double
* Regression of ~40% for MemoryCopy.512.setRange.*.Uint8
* Regression of ~85% for MemoryCopy.4096.setRange.*.Uint8
* ARM8
* Uf8Encode.*.10 improvement from ~35%-~70%
* MapCopy.Map.*.of.Map.* improvement from ~7%-~75%
* MemoryCopy.*.setRange.{8,64} improvement from ~22%-~500%
* Improvement of ~75%-~160% for MemoryCopy.512.setRange.*.Double
* Regression of ~40% for MemoryCopy.512.setRange.*.Uint8
* Regression of ~85% for MemoryCopy.4096.setRange.*.Uint8
TEST=vm/cc/IRTest_Memory, co19{,_2}/LibTest/typed_data,
lib{,_2}/typed_data, corelib{,_2}/list_test
Issue: https://github.com/dart-lang/sdk/issues/42072
Issue: b/294114694
Issue: b/259315681
Change-Id: Ic75521c5fe10b952b5b9ce5f2020c7e3f03672a9
Cq-Include-Trybots: luci.dart.try:vm-aot-linux-debug-simarm_x64-try,vm-aot-linux-debug-simriscv64-try,vm-aot-linux-debug-x64-try,vm-aot-linux-debug-x64c-try,vm-kernel-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-linux-debug-ia32-try,vm-linux-debug-simriscv64-try,vm-linux-debug-x64-try,vm-linux-debug-x64c-try,vm-mac-debug-arm64-try,vm-mac-debug-x64-try,vm-aot-linux-release-simarm64-try,vm-aot-linux-release-simarm_x64-try,vm-aot-linux-release-x64-try,vm-aot-mac-release-arm64-try,vm-aot-mac-release-x64-try,vm-ffi-qemu-linux-release-riscv64-try,vm-ffi-qemu-linux-release-arm-try,vm-aot-msan-linux-release-x64-try,vm-msan-linux-release-x64-try,vm-aot-tsan-linux-release-x64-try,vm-tsan-linux-release-x64-try,vm-linux-release-ia32-try,vm-linux-release-simarm-try,vm-linux-release-simarm64-try,vm-linux-release-x64-try,vm-mac-release-arm64-try,vm-mac-release-x64-try,vm-kernel-precomp-linux-release-x64-try,vm-aot-android-release-arm64c-try,vm-ffi-android-debug-arm64c-try
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/319521
Reviewed-by: Daco Harkes <dacoharkes@google.com>
Reviewed-by: Alexander Markov <alexmarkov@google.com>
Commit-Queue: Tess Strickland <sstrickl@google.com>
2023-09-04 14:38:27 +00:00
|
|
|
const Smi& src_start_smi =
|
|
|
|
Smi::CheckedHandle(zone, arguments->NativeArgAt(4));
|
|
|
|
|
|
|
|
const intptr_t element_size_in_bytes = dst.ElementSizeInBytes();
|
|
|
|
ASSERT_EQUAL(src.ElementSizeInBytes(), element_size_in_bytes);
|
|
|
|
|
|
|
|
const intptr_t dst_start_in_bytes =
|
|
|
|
dst_start_smi.Value() * element_size_in_bytes;
|
[vm/compiler] Move setRange bounds checking entirely into Dart.
The bounds checking was implemented in Dart previously, but this
removes _checkSetRangeArguments, inlining it into
_TypedListBase.setRange, renames _checkBoundsAndMemcpyN to _memMoveN
since it no longer performs bounds checking, and also removes the now
unneeded bounds checking from the native function TypedData_setRange.
TEST=co19{,_2}/LibTest/typed_data lib{,_2}/typed_data
corelib{,_2}/list_test
Issue: https://github.com/dart-lang/sdk/issues/42072
Cq-Include-Trybots: luci.dart.try:vm-aot-linux-debug-simarm_x64-try,vm-aot-linux-debug-x64-try,vm-aot-linux-debug-x64c-try,vm-kernel-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-linux-debug-x64-try,vm-linux-debug-x64c-try,vm-mac-debug-arm64-try,vm-aot-linux-release-simarm_x64-try,vm-aot-linux-release-x64-try,vm-aot-mac-release-arm64-try,vm-linux-release-x64-try,vm-mac-release-arm64-try,vm-kernel-precomp-linux-release-x64-try
Change-Id: I85ec751708f603f68729f4109d7339dd8407ae77
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/324102
Reviewed-by: Alexander Markov <alexmarkov@google.com>
Commit-Queue: Tess Strickland <sstrickl@google.com>
2023-09-05 17:10:51 +00:00
|
|
|
const intptr_t length_in_bytes = length_smi.Value() * element_size_in_bytes;
|
[vm/compiler] Further optimize setRange on TypedData receivers.
When setRange is called on a TypedData receiver and the source is also
a TypedData object with the same element size and clamping is not
required, the VM implementation now calls _boundsCheckAndMemcpyN for
element size N. The generated IL for these methods performs the copy
using the MemoryCopy instruction (mostly, see the note below).
Since the two TypedData objects might have the same underlying
buffer, the CL adds a can_overlap flag to the MemoryCopy instruction
which checks for overlapping regions. If can_overlap is set, then
the copy is performed backwards instead of forwards when needed
to ensure that elements of the source region are read before
they are overwritten.
The existing uses of the MemoryCopy instruction are adjusted as
follows:
* The IL generated for copyRangeFromUint8ListToOneByteString
passes false for can_overlap, as all uses currently ensure that
the OneByteString is non-external and thus cannot overlap.
* The IL generated for _memCopy, used by the FFI library, passes
true for can_overlap, as there is no guarantee that the regions
pointed at by the Pointer objects do not overlap.
The MemoryCopy instruction has also been adjusted so that all numeric
inputs (the two start offsets and the length) are either boxed or
unboxed instead of just the length. This exposed an issue
in the inliner, where unboxed constants in the callee graph were
replaced with boxed constants when inlining into the caller graph,
since withList calls setRange with constant starting offsets of 0.
Now the representation of constants in the callee graph are preserved
when inlining the callee graph into the caller graph.
Fixes https://github.com/dart-lang/sdk/issues/51237 by using TMP
and TMP2 for the LDP/STP calls in the 16-byte element size case, so no
temporaries need to be allocated for the instruction.
On ARM when not unrolling the memory copy loop, uses TMP and a single
additional temporary for LDM/STM calls in the 8-byte and 16-byte
element cases, with the latter just using two LDM/STM calls within
the loop, a different approach than the one described in
https://github.com/dart-lang/sdk/issues/51229 .
Note: Once the number of elements being copied reaches a certain
threshold (1048576 on X86, 256 otherwise), _boundsCheckAndMemcpyN
instead calls _nativeSetRange, which is a native call that uses memmove
from the standard C library for non-clamped inputs. It does this
because the code currently emitted for MemoryCopy performs poorly
compared to the more optimized memmove implementation when copying
larger regions of memory.
Notable benchmark changes for dart-aot:
* X64
* TypedDataDuplicate.*.fromList improvement from ~13%-~250%
* Uf8Encode.*.10 improvement from ~50%-~75%
* MapCopy.Map.*.of.Map.* improvement from ~13%-~65%
* MemoryCopy.*.setRange.* improvement from ~13%-~500%
* ARM7
* Uf8Encode.*.10 improvement from ~35%-~70%
* MapCopy.Map.*.of.Map.* improvement from ~6%-~75%
* MemoryCopy.*.setRange.{8,64} improvement from ~22%-~500%
* Improvement of ~100%-~200% for MemoryCopy.512.setRange.*.Double
* Regression of ~40% for MemoryCopy.512.setRange.*.Uint8
* Regression of ~85% for MemoryCopy.4096.setRange.*.Uint8
* ARM8
* Uf8Encode.*.10 improvement from ~35%-~70%
* MapCopy.Map.*.of.Map.* improvement from ~7%-~75%
* MemoryCopy.*.setRange.{8,64} improvement from ~22%-~500%
* Improvement of ~75%-~160% for MemoryCopy.512.setRange.*.Double
* Regression of ~40% for MemoryCopy.512.setRange.*.Uint8
* Regression of ~85% for MemoryCopy.4096.setRange.*.Uint8
TEST=vm/cc/IRTest_Memory, co19{,_2}/LibTest/typed_data,
lib{,_2}/typed_data, corelib{,_2}/list_test
Issue: https://github.com/dart-lang/sdk/issues/42072
Issue: b/294114694
Issue: b/259315681
Change-Id: Ic75521c5fe10b952b5b9ce5f2020c7e3f03672a9
Cq-Include-Trybots: luci.dart.try:vm-aot-linux-debug-simarm_x64-try,vm-aot-linux-debug-simriscv64-try,vm-aot-linux-debug-x64-try,vm-aot-linux-debug-x64c-try,vm-kernel-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-linux-debug-ia32-try,vm-linux-debug-simriscv64-try,vm-linux-debug-x64-try,vm-linux-debug-x64c-try,vm-mac-debug-arm64-try,vm-mac-debug-x64-try,vm-aot-linux-release-simarm64-try,vm-aot-linux-release-simarm_x64-try,vm-aot-linux-release-x64-try,vm-aot-mac-release-arm64-try,vm-aot-mac-release-x64-try,vm-ffi-qemu-linux-release-riscv64-try,vm-ffi-qemu-linux-release-arm-try,vm-aot-msan-linux-release-x64-try,vm-msan-linux-release-x64-try,vm-aot-tsan-linux-release-x64-try,vm-tsan-linux-release-x64-try,vm-linux-release-ia32-try,vm-linux-release-simarm-try,vm-linux-release-simarm64-try,vm-linux-release-x64-try,vm-mac-release-arm64-try,vm-mac-release-x64-try,vm-kernel-precomp-linux-release-x64-try,vm-aot-android-release-arm64c-try,vm-ffi-android-debug-arm64c-try
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/319521
Reviewed-by: Daco Harkes <dacoharkes@google.com>
Reviewed-by: Alexander Markov <alexmarkov@google.com>
Commit-Queue: Tess Strickland <sstrickl@google.com>
2023-09-04 14:38:27 +00:00
|
|
|
const intptr_t src_start_in_bytes =
|
|
|
|
src_start_smi.Value() * element_size_in_bytes;
|
|
|
|
|
[vm/compiler] Move setRange bounds checking entirely into Dart.
The bounds checking was implemented in Dart previously, but this
removes _checkSetRangeArguments, inlining it into
_TypedListBase.setRange, renames _checkBoundsAndMemcpyN to _memMoveN
since it no longer performs bounds checking, and also removes the now
unneeded bounds checking from the native function TypedData_setRange.
TEST=co19{,_2}/LibTest/typed_data lib{,_2}/typed_data
corelib{,_2}/list_test
Issue: https://github.com/dart-lang/sdk/issues/42072
Cq-Include-Trybots: luci.dart.try:vm-aot-linux-debug-simarm_x64-try,vm-aot-linux-debug-x64-try,vm-aot-linux-debug-x64c-try,vm-kernel-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-linux-debug-x64-try,vm-linux-debug-x64c-try,vm-mac-debug-arm64-try,vm-aot-linux-release-simarm_x64-try,vm-aot-linux-release-x64-try,vm-aot-mac-release-arm64-try,vm-linux-release-x64-try,vm-mac-release-arm64-try,vm-kernel-precomp-linux-release-x64-try
Change-Id: I85ec751708f603f68729f4109d7339dd8407ae77
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/324102
Reviewed-by: Alexander Markov <alexmarkov@google.com>
Commit-Queue: Tess Strickland <sstrickl@google.com>
2023-09-05 17:10:51 +00:00
|
|
|
#if defined(DEBUG)
|
|
|
|
// Verify bounds checks weren't needed.
|
|
|
|
ASSERT(dst_start_in_bytes >= 0);
|
|
|
|
ASSERT(src_start_in_bytes >= 0);
|
|
|
|
// The callers of this native function never call it for a zero-sized copy.
|
|
|
|
ASSERT(length_in_bytes > 0);
|
|
|
|
|
|
|
|
const intptr_t dst_length_in_bytes = dst.LengthInBytes();
|
|
|
|
// Since the length is non-zero, the start can't be the same as the end.
|
|
|
|
ASSERT(dst_start_in_bytes < dst_length_in_bytes);
|
|
|
|
ASSERT(length_in_bytes <= dst_length_in_bytes - dst_start_in_bytes);
|
|
|
|
|
|
|
|
const intptr_t src_length_in_bytes = src.LengthInBytes();
|
|
|
|
// Since the length is non-zero, the start can't be the same as the end.
|
|
|
|
ASSERT(src_start_in_bytes < src_length_in_bytes);
|
|
|
|
ASSERT(length_in_bytes <= src_length_in_bytes - src_start_in_bytes);
|
|
|
|
#endif
|
[vm/compiler] Further optimize setRange on TypedData receivers.
When setRange is called on a TypedData receiver and the source is also
a TypedData object with the same element size and clamping is not
required, the VM implementation now calls _boundsCheckAndMemcpyN for
element size N. The generated IL for these methods performs the copy
using the MemoryCopy instruction (mostly, see the note below).
Since the two TypedData objects might have the same underlying
buffer, the CL adds a can_overlap flag to the MemoryCopy instruction
which checks for overlapping regions. If can_overlap is set, then
the copy is performed backwards instead of forwards when needed
to ensure that elements of the source region are read before
they are overwritten.
The existing uses of the MemoryCopy instruction are adjusted as
follows:
* The IL generated for copyRangeFromUint8ListToOneByteString
passes false for can_overlap, as all uses currently ensure that
the OneByteString is non-external and thus cannot overlap.
* The IL generated for _memCopy, used by the FFI library, passes
true for can_overlap, as there is no guarantee that the regions
pointed at by the Pointer objects do not overlap.
The MemoryCopy instruction has also been adjusted so that all numeric
inputs (the two start offsets and the length) are either boxed or
unboxed instead of just the length. This exposed an issue
in the inliner, where unboxed constants in the callee graph were
replaced with boxed constants when inlining into the caller graph,
since withList calls setRange with constant starting offsets of 0.
Now the representation of constants in the callee graph are preserved
when inlining the callee graph into the caller graph.
Fixes https://github.com/dart-lang/sdk/issues/51237 by using TMP
and TMP2 for the LDP/STP calls in the 16-byte element size case, so no
temporaries need to be allocated for the instruction.
On ARM when not unrolling the memory copy loop, uses TMP and a single
additional temporary for LDM/STM calls in the 8-byte and 16-byte
element cases, with the latter just using two LDM/STM calls within
the loop, a different approach than the one described in
https://github.com/dart-lang/sdk/issues/51229 .
Note: Once the number of elements being copied reaches a certain
threshold (1048576 on X86, 256 otherwise), _boundsCheckAndMemcpyN
instead calls _nativeSetRange, which is a native call that uses memmove
from the standard C library for non-clamped inputs. It does this
because the code currently emitted for MemoryCopy performs poorly
compared to the more optimized memmove implementation when copying
larger regions of memory.
Notable benchmark changes for dart-aot:
* X64
* TypedDataDuplicate.*.fromList improvement from ~13%-~250%
* Uf8Encode.*.10 improvement from ~50%-~75%
* MapCopy.Map.*.of.Map.* improvement from ~13%-~65%
* MemoryCopy.*.setRange.* improvement from ~13%-~500%
* ARM7
* Uf8Encode.*.10 improvement from ~35%-~70%
* MapCopy.Map.*.of.Map.* improvement from ~6%-~75%
* MemoryCopy.*.setRange.{8,64} improvement from ~22%-~500%
* Improvement of ~100%-~200% for MemoryCopy.512.setRange.*.Double
* Regression of ~40% for MemoryCopy.512.setRange.*.Uint8
* Regression of ~85% for MemoryCopy.4096.setRange.*.Uint8
* ARM8
* Uf8Encode.*.10 improvement from ~35%-~70%
* MapCopy.Map.*.of.Map.* improvement from ~7%-~75%
* MemoryCopy.*.setRange.{8,64} improvement from ~22%-~500%
* Improvement of ~75%-~160% for MemoryCopy.512.setRange.*.Double
* Regression of ~40% for MemoryCopy.512.setRange.*.Uint8
* Regression of ~85% for MemoryCopy.4096.setRange.*.Uint8
TEST=vm/cc/IRTest_Memory, co19{,_2}/LibTest/typed_data,
lib{,_2}/typed_data, corelib{,_2}/list_test
Issue: https://github.com/dart-lang/sdk/issues/42072
Issue: b/294114694
Issue: b/259315681
Change-Id: Ic75521c5fe10b952b5b9ce5f2020c7e3f03672a9
Cq-Include-Trybots: luci.dart.try:vm-aot-linux-debug-simarm_x64-try,vm-aot-linux-debug-simriscv64-try,vm-aot-linux-debug-x64-try,vm-aot-linux-debug-x64c-try,vm-kernel-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-linux-debug-ia32-try,vm-linux-debug-simriscv64-try,vm-linux-debug-x64-try,vm-linux-debug-x64c-try,vm-mac-debug-arm64-try,vm-mac-debug-x64-try,vm-aot-linux-release-simarm64-try,vm-aot-linux-release-simarm_x64-try,vm-aot-linux-release-x64-try,vm-aot-mac-release-arm64-try,vm-aot-mac-release-x64-try,vm-ffi-qemu-linux-release-riscv64-try,vm-ffi-qemu-linux-release-arm-try,vm-aot-msan-linux-release-x64-try,vm-msan-linux-release-x64-try,vm-aot-tsan-linux-release-x64-try,vm-tsan-linux-release-x64-try,vm-linux-release-ia32-try,vm-linux-release-simarm-try,vm-linux-release-simarm64-try,vm-linux-release-x64-try,vm-mac-release-arm64-try,vm-mac-release-x64-try,vm-kernel-precomp-linux-release-x64-try,vm-aot-android-release-arm64c-try,vm-ffi-android-debug-arm64c-try
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/319521
Reviewed-by: Daco Harkes <dacoharkes@google.com>
Reviewed-by: Alexander Markov <alexmarkov@google.com>
Commit-Queue: Tess Strickland <sstrickl@google.com>
2023-09-04 14:38:27 +00:00
|
|
|
|
[vm/compiler] Limit exposure of untagged pointers to managed memory.
After https://dart-review.googlesource.com/c/sdk/+/330600, there were
more chances for the optimizing compiler to introduce or move
GC-triggering instructions like allocations or boxings between the
retrieval of an untagged pointer to GC-moveable memory and its use.
To limit the chance of this happening, this CL removes the explicit
loading of the untagged payload address when building the initial
flow graph in most cases when the array is not known to be an external
array (an external string, an external typed data object, or an FFI
Pointer).
The remaining case is during view allocation, which extracts the
payload address of the base typed data object underlying the view
(which may be GC-movable) to calculate the payload address that should
be stored in the data field of the view object. See
https://github.com/dart-lang/sdk/issues/54884.
During canonicalization of LoadIndexed, StoreIndexed, and MemoryCopy
instructions, if the cid of an array input is an external array
(external string, external typed data object, or Pointer), then a
LoadField instruction that extracts the untagged payload address
is inserted before the instruction and the corresponding input is
rebound to that LoadField instruction.
Once all compiler passes that involve code motion have been performed,
a new pass looks for LoadIndexed, StoreIndexed, or MemoryCopy where
the cid stored in the instruction for the array is a typed data cid.
In these cases, if the array is not an internal typed data object,
then the payload address is extracted. Waiting until this point ensures
that no GC-triggering instructions are inserted between the extraction
of the payload address and the use. (Internal typed data objects are
left as-is because the payload address is inside the object itself
and doesn't require indirection through the data field of the object).
This CL also replaces code conditional on the array cid with code
that is instead conditional on the array element representation in
cases where it makes sense to do so, since this is a less brittle
check than checking the array cid (e.g., checking for kUnboxedInt8
to load, store, or copy an signed byte from an array instead of
listing all possible array cids that store signed bytes).
This CL also fixes an issue with the ARM64 assembler where calling
LoadFromOffset with an Address that has a non-Offset type would
silently generate bad code instead of triggering the ASSERT in
PrepareLargeOffset.
TEST=vm/dart/typed_list_index_checkbound_il_test
Issue: https://github.com/dart-lang/sdk/issues/54710
Cq-Include-Trybots: luci.dart.try:vm-aot-android-release-arm64c-try,vm-aot-android-release-arm_x64-try,vm-aot-linux-debug-x64-try,vm-aot-linux-debug-x64c-try,vm-aot-mac-release-arm64-try,vm-aot-mac-release-x64-try,vm-aot-obfuscate-linux-release-x64-try,vm-aot-optimization-level-linux-release-x64-try,vm-aot-win-debug-arm64-try,vm-appjit-linux-debug-x64-try,vm-asan-linux-release-x64-try,vm-checked-mac-release-arm64-try,vm-eager-optimization-linux-release-ia32-try,vm-eager-optimization-linux-release-x64-try,vm-ffi-android-debug-arm-try,vm-ffi-android-debug-arm64c-try,vm-ffi-qemu-linux-release-arm-try,vm-ffi-qemu-linux-release-riscv64-try,vm-fuchsia-release-x64-try,vm-linux-debug-ia32-try,vm-linux-debug-x64-try,vm-linux-debug-x64c-try,vm-mac-debug-arm64-try,vm-mac-debug-x64-try,vm-msan-linux-release-x64-try,vm-reload-linux-debug-x64-try,vm-reload-rollback-linux-debug-x64-try,vm-ubsan-linux-release-x64-try,vm-win-debug-arm64-try,vm-win-debug-x64-try,vm-win-release-ia32-try
Change-Id: I25b5f314943e9254d3d28986d720a5d47f12feeb
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/352363
Reviewed-by: Daco Harkes <dacoharkes@google.com>
Reviewed-by: Ryan Macnak <rmacnak@google.com>
Commit-Queue: Tess Strickland <sstrickl@google.com>
Reviewed-by: Alexander Markov <alexmarkov@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
2024-03-22 10:12:39 +00:00
|
|
|
ASSERT(IsClampedTypedDataBaseClassId(dst.ptr()->GetClassId()));
|
|
|
|
// The algorithm below assumes the clamped destination has uint8 elements.
|
[vm/compiler] Further optimize setRange on TypedData receivers.
When setRange is called on a TypedData receiver and the source is also
a TypedData object with the same element size and clamping is not
required, the VM implementation now calls _boundsCheckAndMemcpyN for
element size N. The generated IL for these methods performs the copy
using the MemoryCopy instruction (mostly, see the note below).
Since the two TypedData objects might have the same underlying
buffer, the CL adds a can_overlap flag to the MemoryCopy instruction
which checks for overlapping regions. If can_overlap is set, then
the copy is performed backwards instead of forwards when needed
to ensure that elements of the source region are read before
they are overwritten.
The existing uses of the MemoryCopy instruction are adjusted as
follows:
* The IL generated for copyRangeFromUint8ListToOneByteString
passes false for can_overlap, as all uses currently ensure that
the OneByteString is non-external and thus cannot overlap.
* The IL generated for _memCopy, used by the FFI library, passes
true for can_overlap, as there is no guarantee that the regions
pointed at by the Pointer objects do not overlap.
The MemoryCopy instruction has also been adjusted so that all numeric
inputs (the two start offsets and the length) are either boxed or
unboxed instead of just the length. This exposed an issue
in the inliner, where unboxed constants in the callee graph were
replaced with boxed constants when inlining into the caller graph,
since withList calls setRange with constant starting offsets of 0.
Now the representation of constants in the callee graph are preserved
when inlining the callee graph into the caller graph.
Fixes https://github.com/dart-lang/sdk/issues/51237 by using TMP
and TMP2 for the LDP/STP calls in the 16-byte element size case, so no
temporaries need to be allocated for the instruction.
On ARM when not unrolling the memory copy loop, uses TMP and a single
additional temporary for LDM/STM calls in the 8-byte and 16-byte
element cases, with the latter just using two LDM/STM calls within
the loop, a different approach than the one described in
https://github.com/dart-lang/sdk/issues/51229 .
Note: Once the number of elements being copied reaches a certain
threshold (1048576 on X86, 256 otherwise), _boundsCheckAndMemcpyN
instead calls _nativeSetRange, which is a native call that uses memmove
from the standard C library for non-clamped inputs. It does this
because the code currently emitted for MemoryCopy performs poorly
compared to the more optimized memmove implementation when copying
larger regions of memory.
Notable benchmark changes for dart-aot:
* X64
* TypedDataDuplicate.*.fromList improvement from ~13%-~250%
* Uf8Encode.*.10 improvement from ~50%-~75%
* MapCopy.Map.*.of.Map.* improvement from ~13%-~65%
* MemoryCopy.*.setRange.* improvement from ~13%-~500%
* ARM7
* Uf8Encode.*.10 improvement from ~35%-~70%
* MapCopy.Map.*.of.Map.* improvement from ~6%-~75%
* MemoryCopy.*.setRange.{8,64} improvement from ~22%-~500%
* Improvement of ~100%-~200% for MemoryCopy.512.setRange.*.Double
* Regression of ~40% for MemoryCopy.512.setRange.*.Uint8
* Regression of ~85% for MemoryCopy.4096.setRange.*.Uint8
* ARM8
* Uf8Encode.*.10 improvement from ~35%-~70%
* MapCopy.Map.*.of.Map.* improvement from ~7%-~75%
* MemoryCopy.*.setRange.{8,64} improvement from ~22%-~500%
* Improvement of ~75%-~160% for MemoryCopy.512.setRange.*.Double
* Regression of ~40% for MemoryCopy.512.setRange.*.Uint8
* Regression of ~85% for MemoryCopy.4096.setRange.*.Uint8
TEST=vm/cc/IRTest_Memory, co19{,_2}/LibTest/typed_data,
lib{,_2}/typed_data, corelib{,_2}/list_test
Issue: https://github.com/dart-lang/sdk/issues/42072
Issue: b/294114694
Issue: b/259315681
Change-Id: Ic75521c5fe10b952b5b9ce5f2020c7e3f03672a9
Cq-Include-Trybots: luci.dart.try:vm-aot-linux-debug-simarm_x64-try,vm-aot-linux-debug-simriscv64-try,vm-aot-linux-debug-x64-try,vm-aot-linux-debug-x64c-try,vm-kernel-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-linux-debug-ia32-try,vm-linux-debug-simriscv64-try,vm-linux-debug-x64-try,vm-linux-debug-x64c-try,vm-mac-debug-arm64-try,vm-mac-debug-x64-try,vm-aot-linux-release-simarm64-try,vm-aot-linux-release-simarm_x64-try,vm-aot-linux-release-x64-try,vm-aot-mac-release-arm64-try,vm-aot-mac-release-x64-try,vm-ffi-qemu-linux-release-riscv64-try,vm-ffi-qemu-linux-release-arm-try,vm-aot-msan-linux-release-x64-try,vm-msan-linux-release-x64-try,vm-aot-tsan-linux-release-x64-try,vm-tsan-linux-release-x64-try,vm-linux-release-ia32-try,vm-linux-release-simarm-try,vm-linux-release-simarm64-try,vm-linux-release-x64-try,vm-mac-release-arm64-try,vm-mac-release-x64-try,vm-kernel-precomp-linux-release-x64-try,vm-aot-android-release-arm64c-try,vm-ffi-android-debug-arm64c-try
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/319521
Reviewed-by: Daco Harkes <dacoharkes@google.com>
Reviewed-by: Alexander Markov <alexmarkov@google.com>
Commit-Queue: Tess Strickland <sstrickl@google.com>
2023-09-04 14:38:27 +00:00
|
|
|
ASSERT_EQUAL(element_size_in_bytes, 1);
|
[vm/compiler] Limit exposure of untagged pointers to managed memory.
After https://dart-review.googlesource.com/c/sdk/+/330600, there were
more chances for the optimizing compiler to introduce or move
GC-triggering instructions like allocations or boxings between the
retrieval of an untagged pointer to GC-moveable memory and its use.
To limit the chance of this happening, this CL removes the explicit
loading of the untagged payload address when building the initial
flow graph in most cases when the array is not known to be an external
array (an external string, an external typed data object, or an FFI
Pointer).
The remaining case is during view allocation, which extracts the
payload address of the base typed data object underlying the view
(which may be GC-movable) to calculate the payload address that should
be stored in the data field of the view object. See
https://github.com/dart-lang/sdk/issues/54884.
During canonicalization of LoadIndexed, StoreIndexed, and MemoryCopy
instructions, if the cid of an array input is an external array
(external string, external typed data object, or Pointer), then a
LoadField instruction that extracts the untagged payload address
is inserted before the instruction and the corresponding input is
rebound to that LoadField instruction.
Once all compiler passes that involve code motion have been performed,
a new pass looks for LoadIndexed, StoreIndexed, or MemoryCopy where
the cid stored in the instruction for the array is a typed data cid.
In these cases, if the array is not an internal typed data object,
then the payload address is extracted. Waiting until this point ensures
that no GC-triggering instructions are inserted between the extraction
of the payload address and the use. (Internal typed data objects are
left as-is because the payload address is inside the object itself
and doesn't require indirection through the data field of the object).
This CL also replaces code conditional on the array cid with code
that is instead conditional on the array element representation in
cases where it makes sense to do so, since this is a less brittle
check than checking the array cid (e.g., checking for kUnboxedInt8
to load, store, or copy an signed byte from an array instead of
listing all possible array cids that store signed bytes).
This CL also fixes an issue with the ARM64 assembler where calling
LoadFromOffset with an Address that has a non-Offset type would
silently generate bad code instead of triggering the ASSERT in
PrepareLargeOffset.
TEST=vm/dart/typed_list_index_checkbound_il_test
Issue: https://github.com/dart-lang/sdk/issues/54710
Cq-Include-Trybots: luci.dart.try:vm-aot-android-release-arm64c-try,vm-aot-android-release-arm_x64-try,vm-aot-linux-debug-x64-try,vm-aot-linux-debug-x64c-try,vm-aot-mac-release-arm64-try,vm-aot-mac-release-x64-try,vm-aot-obfuscate-linux-release-x64-try,vm-aot-optimization-level-linux-release-x64-try,vm-aot-win-debug-arm64-try,vm-appjit-linux-debug-x64-try,vm-asan-linux-release-x64-try,vm-checked-mac-release-arm64-try,vm-eager-optimization-linux-release-ia32-try,vm-eager-optimization-linux-release-x64-try,vm-ffi-android-debug-arm-try,vm-ffi-android-debug-arm64c-try,vm-ffi-qemu-linux-release-arm-try,vm-ffi-qemu-linux-release-riscv64-try,vm-fuchsia-release-x64-try,vm-linux-debug-ia32-try,vm-linux-debug-x64-try,vm-linux-debug-x64c-try,vm-mac-debug-arm64-try,vm-mac-debug-x64-try,vm-msan-linux-release-x64-try,vm-reload-linux-debug-x64-try,vm-reload-rollback-linux-debug-x64-try,vm-ubsan-linux-release-x64-try,vm-win-debug-arm64-try,vm-win-debug-x64-try,vm-win-release-ia32-try
Change-Id: I25b5f314943e9254d3d28986d720a5d47f12feeb
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/352363
Reviewed-by: Daco Harkes <dacoharkes@google.com>
Reviewed-by: Ryan Macnak <rmacnak@google.com>
Commit-Queue: Tess Strickland <sstrickl@google.com>
Reviewed-by: Alexander Markov <alexmarkov@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
2024-03-22 10:12:39 +00:00
|
|
|
ASSERT(IsTypedDataUint8ArrayClassId(dst.ptr()->GetClassId()));
|
|
|
|
// The native entry should only be called when clamping is needed. When the
|
|
|
|
// source has uint8 elements, a direct memory move should be used instead.
|
|
|
|
ASSERT(!IsTypedDataUint8ArrayClassId(src.ptr()->GetClassId()));
|
[vm/compiler] Further optimize setRange on TypedData receivers.
When setRange is called on a TypedData receiver and the source is also
a TypedData object with the same element size and clamping is not
required, the VM implementation now calls _boundsCheckAndMemcpyN for
element size N. The generated IL for these methods performs the copy
using the MemoryCopy instruction (mostly, see the note below).
Since the two TypedData objects might have the same underlying
buffer, the CL adds a can_overlap flag to the MemoryCopy instruction
which checks for overlapping regions. If can_overlap is set, then
the copy is performed backwards instead of forwards when needed
to ensure that elements of the source region are read before
they are overwritten.
The existing uses of the MemoryCopy instruction are adjusted as
follows:
* The IL generated for copyRangeFromUint8ListToOneByteString
passes false for can_overlap, as all uses currently ensure that
the OneByteString is non-external and thus cannot overlap.
* The IL generated for _memCopy, used by the FFI library, passes
true for can_overlap, as there is no guarantee that the regions
pointed at by the Pointer objects do not overlap.
The MemoryCopy instruction has also been adjusted so that all numeric
inputs (the two start offsets and the length) are either boxed or
unboxed instead of just the length. This exposed an issue
in the inliner, where unboxed constants in the callee graph were
replaced with boxed constants when inlining into the caller graph,
since withList calls setRange with constant starting offsets of 0.
Now the representation of constants in the callee graph are preserved
when inlining the callee graph into the caller graph.
Fixes https://github.com/dart-lang/sdk/issues/51237 by using TMP
and TMP2 for the LDP/STP calls in the 16-byte element size case, so no
temporaries need to be allocated for the instruction.
On ARM when not unrolling the memory copy loop, uses TMP and a single
additional temporary for LDM/STM calls in the 8-byte and 16-byte
element cases, with the latter just using two LDM/STM calls within
the loop, a different approach than the one described in
https://github.com/dart-lang/sdk/issues/51229 .
Note: Once the number of elements being copied reaches a certain
threshold (1048576 on X86, 256 otherwise), _boundsCheckAndMemcpyN
instead calls _nativeSetRange, which is a native call that uses memmove
from the standard C library for non-clamped inputs. It does this
because the code currently emitted for MemoryCopy performs poorly
compared to the more optimized memmove implementation when copying
larger regions of memory.
Notable benchmark changes for dart-aot:
* X64
* TypedDataDuplicate.*.fromList improvement from ~13%-~250%
* Uf8Encode.*.10 improvement from ~50%-~75%
* MapCopy.Map.*.of.Map.* improvement from ~13%-~65%
* MemoryCopy.*.setRange.* improvement from ~13%-~500%
* ARM7
* Uf8Encode.*.10 improvement from ~35%-~70%
* MapCopy.Map.*.of.Map.* improvement from ~6%-~75%
* MemoryCopy.*.setRange.{8,64} improvement from ~22%-~500%
* Improvement of ~100%-~200% for MemoryCopy.512.setRange.*.Double
* Regression of ~40% for MemoryCopy.512.setRange.*.Uint8
* Regression of ~85% for MemoryCopy.4096.setRange.*.Uint8
* ARM8
* Uf8Encode.*.10 improvement from ~35%-~70%
* MapCopy.Map.*.of.Map.* improvement from ~7%-~75%
* MemoryCopy.*.setRange.{8,64} improvement from ~22%-~500%
* Improvement of ~75%-~160% for MemoryCopy.512.setRange.*.Double
* Regression of ~40% for MemoryCopy.512.setRange.*.Uint8
* Regression of ~85% for MemoryCopy.4096.setRange.*.Uint8
TEST=vm/cc/IRTest_Memory, co19{,_2}/LibTest/typed_data,
lib{,_2}/typed_data, corelib{,_2}/list_test
Issue: https://github.com/dart-lang/sdk/issues/42072
Issue: b/294114694
Issue: b/259315681
Change-Id: Ic75521c5fe10b952b5b9ce5f2020c7e3f03672a9
Cq-Include-Trybots: luci.dart.try:vm-aot-linux-debug-simarm_x64-try,vm-aot-linux-debug-simriscv64-try,vm-aot-linux-debug-x64-try,vm-aot-linux-debug-x64c-try,vm-kernel-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-linux-debug-ia32-try,vm-linux-debug-simriscv64-try,vm-linux-debug-x64-try,vm-linux-debug-x64c-try,vm-mac-debug-arm64-try,vm-mac-debug-x64-try,vm-aot-linux-release-simarm64-try,vm-aot-linux-release-simarm_x64-try,vm-aot-linux-release-x64-try,vm-aot-mac-release-arm64-try,vm-aot-mac-release-x64-try,vm-ffi-qemu-linux-release-riscv64-try,vm-ffi-qemu-linux-release-arm-try,vm-aot-msan-linux-release-x64-try,vm-msan-linux-release-x64-try,vm-aot-tsan-linux-release-x64-try,vm-tsan-linux-release-x64-try,vm-linux-release-ia32-try,vm-linux-release-simarm-try,vm-linux-release-simarm64-try,vm-linux-release-x64-try,vm-mac-release-arm64-try,vm-mac-release-x64-try,vm-kernel-precomp-linux-release-x64-try,vm-aot-android-release-arm64c-try,vm-ffi-android-debug-arm64c-try
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/319521
Reviewed-by: Daco Harkes <dacoharkes@google.com>
Reviewed-by: Alexander Markov <alexmarkov@google.com>
Commit-Queue: Tess Strickland <sstrickl@google.com>
2023-09-04 14:38:27 +00:00
|
|
|
|
[vm/compiler] Move setRange bounds checking entirely into Dart.
The bounds checking was implemented in Dart previously, but this
removes _checkSetRangeArguments, inlining it into
_TypedListBase.setRange, renames _checkBoundsAndMemcpyN to _memMoveN
since it no longer performs bounds checking, and also removes the now
unneeded bounds checking from the native function TypedData_setRange.
TEST=co19{,_2}/LibTest/typed_data lib{,_2}/typed_data
corelib{,_2}/list_test
Issue: https://github.com/dart-lang/sdk/issues/42072
Cq-Include-Trybots: luci.dart.try:vm-aot-linux-debug-simarm_x64-try,vm-aot-linux-debug-x64-try,vm-aot-linux-debug-x64c-try,vm-kernel-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-linux-debug-x64-try,vm-linux-debug-x64c-try,vm-mac-debug-arm64-try,vm-aot-linux-release-simarm_x64-try,vm-aot-linux-release-x64-try,vm-aot-mac-release-arm64-try,vm-linux-release-x64-try,vm-mac-release-arm64-try,vm-kernel-precomp-linux-release-x64-try
Change-Id: I85ec751708f603f68729f4109d7339dd8407ae77
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/324102
Reviewed-by: Alexander Markov <alexmarkov@google.com>
Commit-Queue: Tess Strickland <sstrickl@google.com>
2023-09-05 17:10:51 +00:00
|
|
|
NoSafepointScope no_safepoint;
|
|
|
|
uint8_t* dst_data =
|
|
|
|
reinterpret_cast<uint8_t*>(dst.DataAddr(dst_start_in_bytes));
|
|
|
|
int8_t* src_data =
|
|
|
|
reinterpret_cast<int8_t*>(src.DataAddr(src_start_in_bytes));
|
|
|
|
for (intptr_t ix = 0; ix < length_in_bytes; ix++) {
|
|
|
|
int8_t v = *src_data;
|
|
|
|
if (v < 0) v = 0;
|
|
|
|
*dst_data = v;
|
|
|
|
src_data++;
|
|
|
|
dst_data++;
|
2013-03-15 13:33:11 +00:00
|
|
|
}
|
2014-03-11 16:56:55 +00:00
|
|
|
|
[vm/compiler] Further optimize setRange on TypedData receivers.
When setRange is called on a TypedData receiver and the source is also
a TypedData object with the same element size and clamping is not
required, the VM implementation now calls _boundsCheckAndMemcpyN for
element size N. The generated IL for these methods performs the copy
using the MemoryCopy instruction (mostly, see the note below).
Since the two TypedData objects might have the same underlying
buffer, the CL adds a can_overlap flag to the MemoryCopy instruction
which checks for overlapping regions. If can_overlap is set, then
the copy is performed backwards instead of forwards when needed
to ensure that elements of the source region are read before
they are overwritten.
The existing uses of the MemoryCopy instruction are adjusted as
follows:
* The IL generated for copyRangeFromUint8ListToOneByteString
passes false for can_overlap, as all uses currently ensure that
the OneByteString is non-external and thus cannot overlap.
* The IL generated for _memCopy, used by the FFI library, passes
true for can_overlap, as there is no guarantee that the regions
pointed at by the Pointer objects do not overlap.
The MemoryCopy instruction has also been adjusted so that all numeric
inputs (the two start offsets and the length) are either boxed or
unboxed instead of just the length. This exposed an issue
in the inliner, where unboxed constants in the callee graph were
replaced with boxed constants when inlining into the caller graph,
since withList calls setRange with constant starting offsets of 0.
Now the representation of constants in the callee graph are preserved
when inlining the callee graph into the caller graph.
Fixes https://github.com/dart-lang/sdk/issues/51237 by using TMP
and TMP2 for the LDP/STP calls in the 16-byte element size case, so no
temporaries need to be allocated for the instruction.
On ARM when not unrolling the memory copy loop, uses TMP and a single
additional temporary for LDM/STM calls in the 8-byte and 16-byte
element cases, with the latter just using two LDM/STM calls within
the loop, a different approach than the one described in
https://github.com/dart-lang/sdk/issues/51229 .
Note: Once the number of elements being copied reaches a certain
threshold (1048576 on X86, 256 otherwise), _boundsCheckAndMemcpyN
instead calls _nativeSetRange, which is a native call that uses memmove
from the standard C library for non-clamped inputs. It does this
because the code currently emitted for MemoryCopy performs poorly
compared to the more optimized memmove implementation when copying
larger regions of memory.
Notable benchmark changes for dart-aot:
* X64
* TypedDataDuplicate.*.fromList improvement from ~13%-~250%
* Uf8Encode.*.10 improvement from ~50%-~75%
* MapCopy.Map.*.of.Map.* improvement from ~13%-~65%
* MemoryCopy.*.setRange.* improvement from ~13%-~500%
* ARM7
* Uf8Encode.*.10 improvement from ~35%-~70%
* MapCopy.Map.*.of.Map.* improvement from ~6%-~75%
* MemoryCopy.*.setRange.{8,64} improvement from ~22%-~500%
* Improvement of ~100%-~200% for MemoryCopy.512.setRange.*.Double
* Regression of ~40% for MemoryCopy.512.setRange.*.Uint8
* Regression of ~85% for MemoryCopy.4096.setRange.*.Uint8
* ARM8
* Uf8Encode.*.10 improvement from ~35%-~70%
* MapCopy.Map.*.of.Map.* improvement from ~7%-~75%
* MemoryCopy.*.setRange.{8,64} improvement from ~22%-~500%
* Improvement of ~75%-~160% for MemoryCopy.512.setRange.*.Double
* Regression of ~40% for MemoryCopy.512.setRange.*.Uint8
* Regression of ~85% for MemoryCopy.4096.setRange.*.Uint8
TEST=vm/cc/IRTest_Memory, co19{,_2}/LibTest/typed_data,
lib{,_2}/typed_data, corelib{,_2}/list_test
Issue: https://github.com/dart-lang/sdk/issues/42072
Issue: b/294114694
Issue: b/259315681
Change-Id: Ic75521c5fe10b952b5b9ce5f2020c7e3f03672a9
Cq-Include-Trybots: luci.dart.try:vm-aot-linux-debug-simarm_x64-try,vm-aot-linux-debug-simriscv64-try,vm-aot-linux-debug-x64-try,vm-aot-linux-debug-x64c-try,vm-kernel-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-linux-debug-ia32-try,vm-linux-debug-simriscv64-try,vm-linux-debug-x64-try,vm-linux-debug-x64c-try,vm-mac-debug-arm64-try,vm-mac-debug-x64-try,vm-aot-linux-release-simarm64-try,vm-aot-linux-release-simarm_x64-try,vm-aot-linux-release-x64-try,vm-aot-mac-release-arm64-try,vm-aot-mac-release-x64-try,vm-ffi-qemu-linux-release-riscv64-try,vm-ffi-qemu-linux-release-arm-try,vm-aot-msan-linux-release-x64-try,vm-msan-linux-release-x64-try,vm-aot-tsan-linux-release-x64-try,vm-tsan-linux-release-x64-try,vm-linux-release-ia32-try,vm-linux-release-simarm-try,vm-linux-release-simarm64-try,vm-linux-release-x64-try,vm-mac-release-arm64-try,vm-mac-release-x64-try,vm-kernel-precomp-linux-release-x64-try,vm-aot-android-release-arm64c-try,vm-ffi-android-debug-arm64c-try
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/319521
Reviewed-by: Daco Harkes <dacoharkes@google.com>
Reviewed-by: Alexander Markov <alexmarkov@google.com>
Commit-Queue: Tess Strickland <sstrickl@google.com>
2023-09-04 14:38:27 +00:00
|
|
|
return Object::null();
|
2013-03-15 13:33:11 +00:00
|
|
|
}
|
|
|
|
|
[vm/compiler] Convert _TypedList get and set methods to normal methods.
Previously, they were implemented as native methods with special
replacements in the inliner.
Instead, create force-compiled versions of the original inliner
replacements and use those instead of native methods, unless the
flow graph compiler doesn't support unboxing the requested element type.
In that case, the force-compiled version just calls a native method,
and we only keep the native methods that might be needed (that is,
for double/SIMD element access).
Also, revert the change in 26911a6176ed84, since now the _getX/_setX
methods are appropriately inlined instead of failing to inline due
to being native methods.
TEST=vm/dart/typed_list_index_checkbound_il_test
Cq-Include-Trybots: luci.dart.try:vm-aot-linux-debug-x64-try,vm-aot-linux-release-x64-try,vm-linux-debug-x64-try,vm-aot-linux-release-simarm_x64-try,vm-linux-release-simarm-try,vm-ffi-qemu-linux-release-arm-try
Change-Id: I4840883d1fc12b36a450803da339406bec149044
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/330786
Commit-Queue: Tess Strickland <sstrickl@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
2024-01-12 10:29:09 +00:00
|
|
|
// The native getter and setter functions defined here are only called if
|
|
|
|
// unboxing doubles or SIMD values is not supported by the flow graph compiler,
|
|
|
|
// and the provided offsets have already been range checked by the calling code.
|
|
|
|
|
|
|
|
#define TYPED_DATA_GETTER(getter, object, ctor) \
|
Revert "[vm/compiler] Perform inlining of _TypedList._getX in AOT."
This reverts commit 6673f84d59fbd921a70fc91da7f0aeb1c18e9ec5.
Reason for revert: does not honor SupportsUnboxedSimd128(), breaking RISC-V
Original change's description:
> [vm/compiler] Perform inlining of _TypedList._getX in AOT.
>
> Before, the inliner only replaced calls to the _TypedList._getX methods
> with specialized IL if speculation was allowed. This means that the
> inlining would not happen in AOT mode, even though the generated IL
> does not require speculation.
>
> In addition, this CL replaces the native functions used for the
> base definition of _TypedList._getX and _TypedList._setX with
> versions built in the FlowGraphBuilder. With this, the VM avoids
> the overhead of going to the runtime for a native call when these
> methods are not inlined, which should also reduce the impact of
> a failure to inline.
>
> TEST=vm/dart/inline_TypedList_getUint32
>
> Issue: https://github.com/dart-lang/sdk/issues/53513
> Cq-Include-Trybots: luci.dart.try:vm-aot-linux-debug-simarm_x64-try,vm-aot-linux-debug-x64-try,vm-aot-linux-debug-x64c-try,vm-kernel-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-linux-debug-x64-try,vm-linux-debug-x64c-try,vm-mac-debug-arm64-try,vm-aot-linux-release-simarm_x64-try,vm-aot-linux-release-x64-try,vm-aot-mac-release-arm64-try,vm-linux-release-x64-try,vm-mac-release-arm64-try,vm-kernel-precomp-linux-release-x64-try
> Change-Id: I66b6b8634b2b9b413fb745f02433eb58f2ff913e
> Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/325703
> Reviewed-by: Martin Kustermann <kustermann@google.com>
> Commit-Queue: Tess Strickland <sstrickl@google.com>
> Reviewed-by: Alexander Markov <alexmarkov@google.com>
Issue: https://github.com/dart-lang/sdk/issues/53513
Change-Id: If3a224e184f084fbe5d059cf036b2c2fb72cd57b
Cq-Include-Trybots: luci.dart.try:vm-aot-linux-debug-simarm_x64-try,vm-aot-linux-debug-x64-try,vm-aot-linux-debug-x64c-try,vm-kernel-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-linux-debug-x64-try,vm-linux-debug-x64c-try,vm-mac-debug-arm64-try,vm-aot-linux-release-simarm_x64-try,vm-aot-linux-release-x64-try,vm-aot-mac-release-arm64-try,vm-linux-release-x64-try,vm-mac-release-arm64-try,vm-kernel-precomp-linux-release-x64-try
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/327802
Auto-Submit: Ryan Macnak <rmacnak@google.com>
Reviewed-by: Alexander Markov <alexmarkov@google.com>
Commit-Queue: Alexander Markov <alexmarkov@google.com>
Bot-Commit: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>
2023-09-25 21:56:57 +00:00
|
|
|
DEFINE_NATIVE_ENTRY(TypedData_##getter, 0, 2) { \
|
|
|
|
GET_NON_NULL_NATIVE_ARGUMENT(TypedDataBase, array, \
|
|
|
|
arguments->NativeArgAt(0)); \
|
|
|
|
GET_NON_NULL_NATIVE_ARGUMENT(Smi, offsetInBytes, \
|
|
|
|
arguments->NativeArgAt(1)); \
|
|
|
|
return object::ctor(array.getter(offsetInBytes.Value())); \
|
|
|
|
}
|
|
|
|
|
[vm/compiler] Convert _TypedList get and set methods to normal methods.
Previously, they were implemented as native methods with special
replacements in the inliner.
Instead, create force-compiled versions of the original inliner
replacements and use those instead of native methods, unless the
flow graph compiler doesn't support unboxing the requested element type.
In that case, the force-compiled version just calls a native method,
and we only keep the native methods that might be needed (that is,
for double/SIMD element access).
Also, revert the change in 26911a6176ed84, since now the _getX/_setX
methods are appropriately inlined instead of failing to inline due
to being native methods.
TEST=vm/dart/typed_list_index_checkbound_il_test
Cq-Include-Trybots: luci.dart.try:vm-aot-linux-debug-x64-try,vm-aot-linux-release-x64-try,vm-linux-debug-x64-try,vm-aot-linux-release-simarm_x64-try,vm-linux-release-simarm-try,vm-ffi-qemu-linux-release-arm-try
Change-Id: I4840883d1fc12b36a450803da339406bec149044
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/330786
Commit-Queue: Tess Strickland <sstrickl@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
2024-01-12 10:29:09 +00:00
|
|
|
#define TYPED_DATA_SETTER(setter, object, get_object_value, access_type) \
|
Revert "[vm/compiler] Perform inlining of _TypedList._getX in AOT."
This reverts commit 6673f84d59fbd921a70fc91da7f0aeb1c18e9ec5.
Reason for revert: does not honor SupportsUnboxedSimd128(), breaking RISC-V
Original change's description:
> [vm/compiler] Perform inlining of _TypedList._getX in AOT.
>
> Before, the inliner only replaced calls to the _TypedList._getX methods
> with specialized IL if speculation was allowed. This means that the
> inlining would not happen in AOT mode, even though the generated IL
> does not require speculation.
>
> In addition, this CL replaces the native functions used for the
> base definition of _TypedList._getX and _TypedList._setX with
> versions built in the FlowGraphBuilder. With this, the VM avoids
> the overhead of going to the runtime for a native call when these
> methods are not inlined, which should also reduce the impact of
> a failure to inline.
>
> TEST=vm/dart/inline_TypedList_getUint32
>
> Issue: https://github.com/dart-lang/sdk/issues/53513
> Cq-Include-Trybots: luci.dart.try:vm-aot-linux-debug-simarm_x64-try,vm-aot-linux-debug-x64-try,vm-aot-linux-debug-x64c-try,vm-kernel-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-linux-debug-x64-try,vm-linux-debug-x64c-try,vm-mac-debug-arm64-try,vm-aot-linux-release-simarm_x64-try,vm-aot-linux-release-x64-try,vm-aot-mac-release-arm64-try,vm-linux-release-x64-try,vm-mac-release-arm64-try,vm-kernel-precomp-linux-release-x64-try
> Change-Id: I66b6b8634b2b9b413fb745f02433eb58f2ff913e
> Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/325703
> Reviewed-by: Martin Kustermann <kustermann@google.com>
> Commit-Queue: Tess Strickland <sstrickl@google.com>
> Reviewed-by: Alexander Markov <alexmarkov@google.com>
Issue: https://github.com/dart-lang/sdk/issues/53513
Change-Id: If3a224e184f084fbe5d059cf036b2c2fb72cd57b
Cq-Include-Trybots: luci.dart.try:vm-aot-linux-debug-simarm_x64-try,vm-aot-linux-debug-x64-try,vm-aot-linux-debug-x64c-try,vm-kernel-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-linux-debug-x64-try,vm-linux-debug-x64c-try,vm-mac-debug-arm64-try,vm-aot-linux-release-simarm_x64-try,vm-aot-linux-release-x64-try,vm-aot-mac-release-arm64-try,vm-linux-release-x64-try,vm-mac-release-arm64-try,vm-kernel-precomp-linux-release-x64-try
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/327802
Auto-Submit: Ryan Macnak <rmacnak@google.com>
Reviewed-by: Alexander Markov <alexmarkov@google.com>
Commit-Queue: Alexander Markov <alexmarkov@google.com>
Bot-Commit: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>
2023-09-25 21:56:57 +00:00
|
|
|
DEFINE_NATIVE_ENTRY(TypedData_##setter, 0, 3) { \
|
|
|
|
GET_NON_NULL_NATIVE_ARGUMENT(TypedDataBase, array, \
|
|
|
|
arguments->NativeArgAt(0)); \
|
|
|
|
GET_NON_NULL_NATIVE_ARGUMENT(Smi, offsetInBytes, \
|
|
|
|
arguments->NativeArgAt(1)); \
|
|
|
|
GET_NON_NULL_NATIVE_ARGUMENT(object, value, arguments->NativeArgAt(2)); \
|
|
|
|
array.setter(offsetInBytes.Value(), \
|
|
|
|
static_cast<access_type>(value.get_object_value())); \
|
|
|
|
return Object::null(); \
|
|
|
|
}
|
|
|
|
|
|
|
|
#define TYPED_DATA_NATIVES(type_name, object, ctor, get_object_value, \
|
[vm/compiler] Convert _TypedList get and set methods to normal methods.
Previously, they were implemented as native methods with special
replacements in the inliner.
Instead, create force-compiled versions of the original inliner
replacements and use those instead of native methods, unless the
flow graph compiler doesn't support unboxing the requested element type.
In that case, the force-compiled version just calls a native method,
and we only keep the native methods that might be needed (that is,
for double/SIMD element access).
Also, revert the change in 26911a6176ed84, since now the _getX/_setX
methods are appropriately inlined instead of failing to inline due
to being native methods.
TEST=vm/dart/typed_list_index_checkbound_il_test
Cq-Include-Trybots: luci.dart.try:vm-aot-linux-debug-x64-try,vm-aot-linux-release-x64-try,vm-linux-debug-x64-try,vm-aot-linux-release-simarm_x64-try,vm-linux-release-simarm-try,vm-ffi-qemu-linux-release-arm-try
Change-Id: I4840883d1fc12b36a450803da339406bec149044
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/330786
Commit-Queue: Tess Strickland <sstrickl@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
2024-01-12 10:29:09 +00:00
|
|
|
access_type) \
|
|
|
|
TYPED_DATA_GETTER(Get##type_name, object, ctor) \
|
|
|
|
TYPED_DATA_SETTER(Set##type_name, object, get_object_value, access_type)
|
|
|
|
|
|
|
|
TYPED_DATA_NATIVES(Float32, Double, New, value, float)
|
|
|
|
TYPED_DATA_NATIVES(Float64, Double, New, value, double)
|
|
|
|
TYPED_DATA_NATIVES(Float32x4, Float32x4, New, value, simd128_value_t)
|
|
|
|
TYPED_DATA_NATIVES(Int32x4, Int32x4, New, value, simd128_value_t)
|
|
|
|
TYPED_DATA_NATIVES(Float64x2, Float64x2, New, value, simd128_value_t)
|
Revert "[vm/compiler] Perform inlining of _TypedList._getX in AOT."
This reverts commit 6673f84d59fbd921a70fc91da7f0aeb1c18e9ec5.
Reason for revert: does not honor SupportsUnboxedSimd128(), breaking RISC-V
Original change's description:
> [vm/compiler] Perform inlining of _TypedList._getX in AOT.
>
> Before, the inliner only replaced calls to the _TypedList._getX methods
> with specialized IL if speculation was allowed. This means that the
> inlining would not happen in AOT mode, even though the generated IL
> does not require speculation.
>
> In addition, this CL replaces the native functions used for the
> base definition of _TypedList._getX and _TypedList._setX with
> versions built in the FlowGraphBuilder. With this, the VM avoids
> the overhead of going to the runtime for a native call when these
> methods are not inlined, which should also reduce the impact of
> a failure to inline.
>
> TEST=vm/dart/inline_TypedList_getUint32
>
> Issue: https://github.com/dart-lang/sdk/issues/53513
> Cq-Include-Trybots: luci.dart.try:vm-aot-linux-debug-simarm_x64-try,vm-aot-linux-debug-x64-try,vm-aot-linux-debug-x64c-try,vm-kernel-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-linux-debug-x64-try,vm-linux-debug-x64c-try,vm-mac-debug-arm64-try,vm-aot-linux-release-simarm_x64-try,vm-aot-linux-release-x64-try,vm-aot-mac-release-arm64-try,vm-linux-release-x64-try,vm-mac-release-arm64-try,vm-kernel-precomp-linux-release-x64-try
> Change-Id: I66b6b8634b2b9b413fb745f02433eb58f2ff913e
> Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/325703
> Reviewed-by: Martin Kustermann <kustermann@google.com>
> Commit-Queue: Tess Strickland <sstrickl@google.com>
> Reviewed-by: Alexander Markov <alexmarkov@google.com>
Issue: https://github.com/dart-lang/sdk/issues/53513
Change-Id: If3a224e184f084fbe5d059cf036b2c2fb72cd57b
Cq-Include-Trybots: luci.dart.try:vm-aot-linux-debug-simarm_x64-try,vm-aot-linux-debug-x64-try,vm-aot-linux-debug-x64c-try,vm-kernel-linux-debug-x64-try,vm-kernel-precomp-linux-debug-x64-try,vm-linux-debug-x64-try,vm-linux-debug-x64c-try,vm-mac-debug-arm64-try,vm-aot-linux-release-simarm_x64-try,vm-aot-linux-release-x64-try,vm-aot-mac-release-arm64-try,vm-linux-release-x64-try,vm-mac-release-arm64-try,vm-kernel-precomp-linux-release-x64-try
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/327802
Auto-Submit: Ryan Macnak <rmacnak@google.com>
Reviewed-by: Alexander Markov <alexmarkov@google.com>
Commit-Queue: Alexander Markov <alexmarkov@google.com>
Bot-Commit: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>
2023-09-25 21:56:57 +00:00
|
|
|
|
2013-03-07 21:50:33 +00:00
|
|
|
} // namespace dart
|