mirror of
https://github.com/dart-lang/sdk
synced 2024-11-02 10:10:22 +00:00
[vm/simd] Add Float64x2 clamp simd implementation
Fixes https://github.com/dart-lang/sdk/issues/40427 TEST=ci, float64x2_clamp_test Change-Id: I12618c37135feecffb115ce4aca02af1ecb03167 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/243848 Reviewed-by: Ryan Macnak <rmacnak@google.com> Commit-Queue: Alexander Aprelev <aam@google.com>
This commit is contained in:
parent
2783cca57d
commit
2a12dd3315
11 changed files with 222 additions and 0 deletions
|
@ -9433,6 +9433,7 @@ class LoadThreadInstr : public TemplateDefinition<0, NoThrow, Pure> {
|
|||
M(1, _, Float32x4Abs, (Float32x4), Float32x4) \
|
||||
M(1, _, Float64x2Abs, (Float64x2), Float64x2) \
|
||||
M(3, _, Float32x4Clamp, (Float32x4, Float32x4, Float32x4), Float32x4) \
|
||||
M(3, _, Float64x2Clamp, (Float64x2, Float64x2, Float64x2), Float64x2) \
|
||||
M(1, _, Float64x2GetX, (Float64x2), Double) \
|
||||
M(1, _, Float64x2GetY, (Float64x2), Double) \
|
||||
M(2, _, Float64x2WithX, (Float64x2, Double), Float64x2) \
|
||||
|
|
|
@ -5259,6 +5259,36 @@ DEFINE_EMIT(
|
|||
__ vmaxqs(result, result, lower);
|
||||
}
|
||||
|
||||
DEFINE_EMIT(Float64x2Clamp,
|
||||
(QRegisterView result,
|
||||
QRegisterView left,
|
||||
QRegisterView lower,
|
||||
QRegisterView upper)) {
|
||||
compiler::Label done0, done1;
|
||||
// result = max(min(left, upper), lower) |
|
||||
// lower if (upper is NaN || left is NaN) |
|
||||
// upper if lower is NaN
|
||||
__ vcmpd(left.d(0), upper.d(0));
|
||||
__ vmstat();
|
||||
__ vmovd(result.d(0), upper.d(0), GE);
|
||||
__ vmovd(result.d(0), left.d(0), LT); // less than or unordered(NaN)
|
||||
__ b(&done0, VS); // at least one argument was NaN
|
||||
__ vcmpd(result.d(0), lower.d(0));
|
||||
__ vmstat();
|
||||
__ vmovd(result.d(0), lower.d(0), LE);
|
||||
__ Bind(&done0);
|
||||
|
||||
__ vcmpd(left.d(1), upper.d(1));
|
||||
__ vmstat();
|
||||
__ vmovd(result.d(1), upper.d(1), GE);
|
||||
__ vmovd(result.d(1), left.d(1), LT); // less than or unordered(NaN)
|
||||
__ b(&done1, VS); // at least one argument was NaN
|
||||
__ vcmpd(result.d(1), lower.d(1));
|
||||
__ vmstat();
|
||||
__ vmovd(result.d(1), lower.d(1), LE);
|
||||
__ Bind(&done1);
|
||||
}
|
||||
|
||||
// Low (< 7) Q registers are needed for the vmovs instruction.
|
||||
// TODO(dartbug.com/30953) support register range constraints in the regalloc.
|
||||
DEFINE_EMIT(Float32x4With,
|
||||
|
@ -5564,6 +5594,7 @@ DEFINE_EMIT(Int32x4WithFlag,
|
|||
CASE(Int32x4ToFloat32x4) \
|
||||
____(Simd32x4ToSimd32x4Convertion) \
|
||||
SIMPLE(Float32x4Clamp) \
|
||||
SIMPLE(Float64x2Clamp) \
|
||||
CASE(Float32x4WithX) \
|
||||
CASE(Float32x4WithY) \
|
||||
CASE(Float32x4WithZ) \
|
||||
|
|
|
@ -4427,6 +4427,13 @@ DEFINE_EMIT(
|
|||
__ vmaxs(result, result, lower);
|
||||
}
|
||||
|
||||
DEFINE_EMIT(
|
||||
Float64x2Clamp,
|
||||
(VRegister result, VRegister value, VRegister lower, VRegister upper)) {
|
||||
__ vmind(result, value, upper);
|
||||
__ vmaxd(result, result, lower);
|
||||
}
|
||||
|
||||
DEFINE_EMIT(Float32x4With,
|
||||
(VRegister result, VRegister replacement, VRegister value)) {
|
||||
__ fcvtsd(VTMP, replacement);
|
||||
|
@ -4619,6 +4626,8 @@ DEFINE_EMIT(Int32x4WithFlag,
|
|||
____(SimdZero) \
|
||||
CASE(Float32x4Clamp) \
|
||||
____(Float32x4Clamp) \
|
||||
CASE(Float64x2Clamp) \
|
||||
____(Float64x2Clamp) \
|
||||
CASE(Float32x4WithX) \
|
||||
CASE(Float32x4WithY) \
|
||||
CASE(Float32x4WithZ) \
|
||||
|
|
|
@ -4578,6 +4578,15 @@ DEFINE_EMIT(Float32x4Clamp,
|
|||
__ maxps(left, lower);
|
||||
}
|
||||
|
||||
DEFINE_EMIT(Float64x2Clamp,
|
||||
(SameAsFirstInput,
|
||||
XmmRegister left,
|
||||
XmmRegister lower,
|
||||
XmmRegister upper)) {
|
||||
__ minpd(left, upper);
|
||||
__ maxpd(left, lower);
|
||||
}
|
||||
|
||||
DEFINE_EMIT(Int32x4FromInts,
|
||||
(XmmRegister result, Register, Register, Register, Register)) {
|
||||
// TODO(dartbug.com/30949) avoid transfer through memory.
|
||||
|
@ -4727,6 +4736,7 @@ DEFINE_EMIT(Int32x4Select,
|
|||
SIMPLE(Float32x4Zero) \
|
||||
SIMPLE(Float64x2Zero) \
|
||||
SIMPLE(Float32x4Clamp) \
|
||||
SIMPLE(Float64x2Clamp) \
|
||||
CASE(Int32x4GetFlagX) \
|
||||
CASE(Int32x4GetFlagY) \
|
||||
CASE(Int32x4GetFlagZ) \
|
||||
|
|
|
@ -4722,6 +4722,12 @@ DEFINE_EMIT(
|
|||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
DEFINE_EMIT(
|
||||
Float64x2Clamp,
|
||||
(FRegister result, FRegister value, FRegister lower, FRegister upper)) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
DEFINE_EMIT(Float32x4With,
|
||||
(FRegister result, FRegister replacement, FRegister value)) {
|
||||
UNIMPLEMENTED();
|
||||
|
@ -4821,6 +4827,8 @@ DEFINE_EMIT(Int32x4WithFlag,
|
|||
____(SimdZero) \
|
||||
CASE(Float32x4Clamp) \
|
||||
____(Float32x4Clamp) \
|
||||
CASE(Float64x2Clamp) \
|
||||
____(Float64x2Clamp) \
|
||||
CASE(Float32x4WithX) \
|
||||
CASE(Float32x4WithY) \
|
||||
CASE(Float32x4WithZ) \
|
||||
|
|
|
@ -4806,6 +4806,15 @@ DEFINE_EMIT(Float32x4Clamp,
|
|||
__ maxps(value, lower);
|
||||
}
|
||||
|
||||
DEFINE_EMIT(Float64x2Clamp,
|
||||
(SameAsFirstInput,
|
||||
XmmRegister value,
|
||||
XmmRegister lower,
|
||||
XmmRegister upper)) {
|
||||
__ minpd(value, upper);
|
||||
__ maxpd(value, lower);
|
||||
}
|
||||
|
||||
DEFINE_EMIT(Int32x4FromInts,
|
||||
(XmmRegister result, Register, Register, Register, Register)) {
|
||||
// TODO(dartbug.com/30949) avoid transfer through memory.
|
||||
|
@ -4953,6 +4962,7 @@ DEFINE_EMIT(Int32x4Select,
|
|||
SIMPLE(Float32x4Zero) \
|
||||
SIMPLE(Float64x2Zero) \
|
||||
SIMPLE(Float32x4Clamp) \
|
||||
SIMPLE(Float64x2Clamp) \
|
||||
CASE(Int32x4GetFlagX) \
|
||||
CASE(Int32x4GetFlagY) \
|
||||
____(Int32x4GetFlagXorY) \
|
||||
|
|
|
@ -4072,6 +4072,7 @@ bool FlowGraphInliner::TryInlineRecognizedMethod(
|
|||
case MethodRecognizer::kFloat32x4WithZ:
|
||||
case MethodRecognizer::kFloat32x4Zero:
|
||||
case MethodRecognizer::kFloat64x2Abs:
|
||||
case MethodRecognizer::kFloat64x2Clamp:
|
||||
case MethodRecognizer::kFloat64x2FromDoubles:
|
||||
case MethodRecognizer::kFloat64x2GetSignMask:
|
||||
case MethodRecognizer::kFloat64x2GetX:
|
||||
|
|
|
@ -185,6 +185,7 @@ namespace dart {
|
|||
V(_Float64x2, get:y, Float64x2GetY, 0x27cae053) \
|
||||
V(_Float64x2, unary-, Float64x2Negate, 0x958a0d28) \
|
||||
V(_Float64x2, abs, Float64x2Abs, 0x9a24c75e) \
|
||||
V(_Float64x2, clamp, Float64x2Clamp, 0xfddc1533) \
|
||||
V(_Float64x2, sqrt, Float64x2Sqrt, 0x93d543c8) \
|
||||
V(_Float64x2, get:signMask, Float64x2GetSignMask, 0x7c6b11ea) \
|
||||
V(_Float64x2, scale, Float64x2Scale, 0x52959118) \
|
||||
|
|
|
@ -4031,6 +4031,8 @@ class _Float64x2 implements Float64x2 {
|
|||
@pragma("vm:exact-result-type", _Float64x2)
|
||||
@pragma("vm:external-name", "Float64x2_abs")
|
||||
external Float64x2 abs();
|
||||
@pragma("vm:recognized", "other")
|
||||
@pragma("vm:exact-result-type", _Float64x2)
|
||||
@pragma("vm:external-name", "Float64x2_clamp")
|
||||
external Float64x2 clamp(Float64x2 lowerLimit, Float64x2 upperLimit);
|
||||
@pragma("vm:recognized", "other")
|
||||
|
|
74
tests/lib/typed_data/float64x2_clamp_test.dart
Normal file
74
tests/lib/typed_data/float64x2_clamp_test.dart
Normal file
|
@ -0,0 +1,74 @@
|
|||
// Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
|
||||
// for details. All rights reserved. Use of this source code is governed by a
|
||||
// BSD-style license that can be found in the LICENSE file.
|
||||
// VMOptions=--intrinsify --optimization-counter-threshold=10 --no-background-compilation
|
||||
// VMOptions=--no-intrinsify --optimization-counter-threshold=10 --no-background-compilation
|
||||
|
||||
// Library tag to be able to run in html test framework.
|
||||
library float64x2_clamp_test;
|
||||
|
||||
import 'dart:math';
|
||||
import 'dart:typed_data';
|
||||
import 'package:expect/expect.dart';
|
||||
|
||||
void testClampLowerGreaterThanUpper() {
|
||||
Float64x2 l = new Float64x2(1.0, 1.0);
|
||||
Float64x2 u = new Float64x2(-1.0, -1.0);
|
||||
Float64x2 z = new Float64x2.zero();
|
||||
Float64x2 a = z.clamp(l, u);
|
||||
Expect.equals(a.x, 1.0);
|
||||
Expect.equals(a.y, 1.0);
|
||||
}
|
||||
|
||||
void testClamp() {
|
||||
Float64x2 l = new Float64x2(-1.0, -1.0);
|
||||
Float64x2 u = new Float64x2(1.0, 1.0);
|
||||
Float64x2 z = new Float64x2.zero();
|
||||
Float64x2 a = z.clamp(l, u);
|
||||
Expect.equals(a.x, 0.0);
|
||||
Expect.equals(a.y, 0.0);
|
||||
}
|
||||
|
||||
void testNonZeroClamp() {
|
||||
Float64x2 l = new Float64x2(-pow(123456.789, 123.1) as double, -234567.89);
|
||||
Float64x2 u = new Float64x2(pow(123456.789, 123.1) as double, 234567.89);
|
||||
Float64x2 v =
|
||||
new Float64x2(-pow(123456789.123, 123.1) as double, 234567890.123);
|
||||
Float64x2 a = v.clamp(l, u);
|
||||
Expect.equals(a.x, -pow(123456.789, 123) as double);
|
||||
Expect.equals(a.y, 234567.89);
|
||||
}
|
||||
|
||||
Float64x2 negativeZeroClamp() {
|
||||
final negZero = -Float64x2.zero();
|
||||
return negZero.clamp(negZero, Float64x2.zero());
|
||||
}
|
||||
|
||||
Float64x2 zeroClamp() {
|
||||
final negOne = -Float64x2(1.0, 1.0);
|
||||
return Float64x2.zero().clamp(negOne, -Float64x2.zero());
|
||||
}
|
||||
|
||||
void testNegativeZeroClamp(Float64x2 unopt) {
|
||||
final res = negativeZeroClamp();
|
||||
Expect.equals(res.x.compareTo(unopt.x), 0);
|
||||
Expect.equals(res.y.compareTo(unopt.y), 0);
|
||||
}
|
||||
|
||||
void testZeroClamp(Float64x2 unopt) {
|
||||
final res = zeroClamp();
|
||||
Expect.equals(res.x.compareTo(unopt.x), 0);
|
||||
Expect.equals(res.y.compareTo(unopt.y), 0);
|
||||
}
|
||||
|
||||
main() {
|
||||
final unoptNegZeroClamp = negativeZeroClamp();
|
||||
final unoptZeroClamp = zeroClamp();
|
||||
for (int i = 0; i < 2000; i++) {
|
||||
testClampLowerGreaterThanUpper();
|
||||
testClamp();
|
||||
testNonZeroClamp();
|
||||
testNegativeZeroClamp(unoptNegZeroClamp);
|
||||
testZeroClamp(unoptZeroClamp);
|
||||
}
|
||||
}
|
75
tests/lib_2/typed_data/float64x2_clamp_test.dart
Normal file
75
tests/lib_2/typed_data/float64x2_clamp_test.dart
Normal file
|
@ -0,0 +1,75 @@
|
|||
// Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
|
||||
// for details. All rights reserved. Use of this source code is governed by a
|
||||
// BSD-style license that can be found in the LICENSE file.
|
||||
// VMOptions=--optimization-counter-threshold=10 --no-background-compilation
|
||||
|
||||
// @dart = 2.9
|
||||
|
||||
// Library tag to be able to run in html test framework.
|
||||
library float64x2_clamp_test;
|
||||
|
||||
import 'dart:math';
|
||||
import 'dart:typed_data';
|
||||
import 'package:expect/expect.dart';
|
||||
|
||||
void testClampLowerGreaterThanUpper() {
|
||||
Float64x2 l = new Float64x2(1.0, 1.0);
|
||||
Float64x2 u = new Float64x2(-1.0, -1.0);
|
||||
Float64x2 z = new Float64x2.zero();
|
||||
Float64x2 a = z.clamp(l, u);
|
||||
Expect.equals(a.x, 1.0);
|
||||
Expect.equals(a.y, 1.0);
|
||||
}
|
||||
|
||||
void testClamp() {
|
||||
Float64x2 l = new Float64x2(-1.0, -1.0);
|
||||
Float64x2 u = new Float64x2(1.0, 1.0);
|
||||
Float64x2 z = new Float64x2.zero();
|
||||
Float64x2 a = z.clamp(l, u);
|
||||
Expect.equals(a.x, 0.0);
|
||||
Expect.equals(a.y, 0.0);
|
||||
}
|
||||
|
||||
void testNonZeroClamp() {
|
||||
Float64x2 l = new Float64x2(-pow(123456.789, 123.1) as double, -234567.89);
|
||||
Float64x2 u = new Float64x2(pow(123456.789, 123.1) as double, 234567.89);
|
||||
Float64x2 v =
|
||||
new Float64x2(-pow(123456789.123, 123.1) as double, 234567890.123);
|
||||
Float64x2 a = v.clamp(l, u);
|
||||
Expect.equals(a.x, -pow(123456.789, 123) as double);
|
||||
Expect.equals(a.y, 234567.89);
|
||||
}
|
||||
|
||||
Float64x2 negativeZeroClamp() {
|
||||
final negZero = -Float64x2.zero();
|
||||
return negZero.clamp(negZero, Float64x2.zero());
|
||||
}
|
||||
|
||||
Float64x2 zeroClamp() {
|
||||
final negOne = -Float64x2(1.0, 1.0);
|
||||
return Float64x2.zero().clamp(negOne, -Float64x2.zero());
|
||||
}
|
||||
|
||||
void testNegativeZeroClamp(Float64x2 unopt) {
|
||||
final res = negativeZeroClamp();
|
||||
Expect.equals(res.x.compareTo(unopt.x), 0);
|
||||
Expect.equals(res.y.compareTo(unopt.y), 0);
|
||||
}
|
||||
|
||||
void testZeroClamp(Float64x2 unopt) {
|
||||
final res = zeroClamp();
|
||||
Expect.equals(res.x.compareTo(unopt.x), 0);
|
||||
Expect.equals(res.y.compareTo(unopt.y), 0);
|
||||
}
|
||||
|
||||
main() {
|
||||
final unoptNegZeroClamp = negativeZeroClamp();
|
||||
final unoptZeroClamp = zeroClamp();
|
||||
for (int i = 0; i < 2000; i++) {
|
||||
testClampLowerGreaterThanUpper();
|
||||
testClamp();
|
||||
testNonZeroClamp();
|
||||
testNegativeZeroClamp(unoptNegZeroClamp);
|
||||
testZeroClamp(unoptZeroClamp);
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue