[vm/simd] Add Float64x2 clamp simd implementation

Fixes https://github.com/dart-lang/sdk/issues/40427

TEST=ci, float64x2_clamp_test

Change-Id: I12618c37135feecffb115ce4aca02af1ecb03167
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/243848
Reviewed-by: Ryan Macnak <rmacnak@google.com>
Commit-Queue: Alexander Aprelev <aam@google.com>
This commit is contained in:
Alexander Aprelev 2022-05-10 04:56:25 +00:00 committed by Commit Bot
parent 2783cca57d
commit 2a12dd3315
11 changed files with 222 additions and 0 deletions

View file

@ -9433,6 +9433,7 @@ class LoadThreadInstr : public TemplateDefinition<0, NoThrow, Pure> {
M(1, _, Float32x4Abs, (Float32x4), Float32x4) \
M(1, _, Float64x2Abs, (Float64x2), Float64x2) \
M(3, _, Float32x4Clamp, (Float32x4, Float32x4, Float32x4), Float32x4) \
M(3, _, Float64x2Clamp, (Float64x2, Float64x2, Float64x2), Float64x2) \
M(1, _, Float64x2GetX, (Float64x2), Double) \
M(1, _, Float64x2GetY, (Float64x2), Double) \
M(2, _, Float64x2WithX, (Float64x2, Double), Float64x2) \

View file

@ -5259,6 +5259,36 @@ DEFINE_EMIT(
__ vmaxqs(result, result, lower);
}
DEFINE_EMIT(Float64x2Clamp,
(QRegisterView result,
QRegisterView left,
QRegisterView lower,
QRegisterView upper)) {
compiler::Label done0, done1;
// result = max(min(left, upper), lower) |
// lower if (upper is NaN || left is NaN) |
// upper if lower is NaN
__ vcmpd(left.d(0), upper.d(0));
__ vmstat();
__ vmovd(result.d(0), upper.d(0), GE);
__ vmovd(result.d(0), left.d(0), LT); // less than or unordered(NaN)
__ b(&done0, VS); // at least one argument was NaN
__ vcmpd(result.d(0), lower.d(0));
__ vmstat();
__ vmovd(result.d(0), lower.d(0), LE);
__ Bind(&done0);
__ vcmpd(left.d(1), upper.d(1));
__ vmstat();
__ vmovd(result.d(1), upper.d(1), GE);
__ vmovd(result.d(1), left.d(1), LT); // less than or unordered(NaN)
__ b(&done1, VS); // at least one argument was NaN
__ vcmpd(result.d(1), lower.d(1));
__ vmstat();
__ vmovd(result.d(1), lower.d(1), LE);
__ Bind(&done1);
}
// Low (< 7) Q registers are needed for the vmovs instruction.
// TODO(dartbug.com/30953) support register range constraints in the regalloc.
DEFINE_EMIT(Float32x4With,
@ -5564,6 +5594,7 @@ DEFINE_EMIT(Int32x4WithFlag,
CASE(Int32x4ToFloat32x4) \
____(Simd32x4ToSimd32x4Convertion) \
SIMPLE(Float32x4Clamp) \
SIMPLE(Float64x2Clamp) \
CASE(Float32x4WithX) \
CASE(Float32x4WithY) \
CASE(Float32x4WithZ) \

View file

@ -4427,6 +4427,13 @@ DEFINE_EMIT(
__ vmaxs(result, result, lower);
}
DEFINE_EMIT(
Float64x2Clamp,
(VRegister result, VRegister value, VRegister lower, VRegister upper)) {
__ vmind(result, value, upper);
__ vmaxd(result, result, lower);
}
DEFINE_EMIT(Float32x4With,
(VRegister result, VRegister replacement, VRegister value)) {
__ fcvtsd(VTMP, replacement);
@ -4619,6 +4626,8 @@ DEFINE_EMIT(Int32x4WithFlag,
____(SimdZero) \
CASE(Float32x4Clamp) \
____(Float32x4Clamp) \
CASE(Float64x2Clamp) \
____(Float64x2Clamp) \
CASE(Float32x4WithX) \
CASE(Float32x4WithY) \
CASE(Float32x4WithZ) \

View file

@ -4578,6 +4578,15 @@ DEFINE_EMIT(Float32x4Clamp,
__ maxps(left, lower);
}
DEFINE_EMIT(Float64x2Clamp,
(SameAsFirstInput,
XmmRegister left,
XmmRegister lower,
XmmRegister upper)) {
__ minpd(left, upper);
__ maxpd(left, lower);
}
DEFINE_EMIT(Int32x4FromInts,
(XmmRegister result, Register, Register, Register, Register)) {
// TODO(dartbug.com/30949) avoid transfer through memory.
@ -4727,6 +4736,7 @@ DEFINE_EMIT(Int32x4Select,
SIMPLE(Float32x4Zero) \
SIMPLE(Float64x2Zero) \
SIMPLE(Float32x4Clamp) \
SIMPLE(Float64x2Clamp) \
CASE(Int32x4GetFlagX) \
CASE(Int32x4GetFlagY) \
CASE(Int32x4GetFlagZ) \

View file

@ -4722,6 +4722,12 @@ DEFINE_EMIT(
UNIMPLEMENTED();
}
DEFINE_EMIT(
Float64x2Clamp,
(FRegister result, FRegister value, FRegister lower, FRegister upper)) {
UNIMPLEMENTED();
}
DEFINE_EMIT(Float32x4With,
(FRegister result, FRegister replacement, FRegister value)) {
UNIMPLEMENTED();
@ -4821,6 +4827,8 @@ DEFINE_EMIT(Int32x4WithFlag,
____(SimdZero) \
CASE(Float32x4Clamp) \
____(Float32x4Clamp) \
CASE(Float64x2Clamp) \
____(Float64x2Clamp) \
CASE(Float32x4WithX) \
CASE(Float32x4WithY) \
CASE(Float32x4WithZ) \

View file

@ -4806,6 +4806,15 @@ DEFINE_EMIT(Float32x4Clamp,
__ maxps(value, lower);
}
DEFINE_EMIT(Float64x2Clamp,
(SameAsFirstInput,
XmmRegister value,
XmmRegister lower,
XmmRegister upper)) {
__ minpd(value, upper);
__ maxpd(value, lower);
}
DEFINE_EMIT(Int32x4FromInts,
(XmmRegister result, Register, Register, Register, Register)) {
// TODO(dartbug.com/30949) avoid transfer through memory.
@ -4953,6 +4962,7 @@ DEFINE_EMIT(Int32x4Select,
SIMPLE(Float32x4Zero) \
SIMPLE(Float64x2Zero) \
SIMPLE(Float32x4Clamp) \
SIMPLE(Float64x2Clamp) \
CASE(Int32x4GetFlagX) \
CASE(Int32x4GetFlagY) \
____(Int32x4GetFlagXorY) \

View file

@ -4072,6 +4072,7 @@ bool FlowGraphInliner::TryInlineRecognizedMethod(
case MethodRecognizer::kFloat32x4WithZ:
case MethodRecognizer::kFloat32x4Zero:
case MethodRecognizer::kFloat64x2Abs:
case MethodRecognizer::kFloat64x2Clamp:
case MethodRecognizer::kFloat64x2FromDoubles:
case MethodRecognizer::kFloat64x2GetSignMask:
case MethodRecognizer::kFloat64x2GetX:

View file

@ -185,6 +185,7 @@ namespace dart {
V(_Float64x2, get:y, Float64x2GetY, 0x27cae053) \
V(_Float64x2, unary-, Float64x2Negate, 0x958a0d28) \
V(_Float64x2, abs, Float64x2Abs, 0x9a24c75e) \
V(_Float64x2, clamp, Float64x2Clamp, 0xfddc1533) \
V(_Float64x2, sqrt, Float64x2Sqrt, 0x93d543c8) \
V(_Float64x2, get:signMask, Float64x2GetSignMask, 0x7c6b11ea) \
V(_Float64x2, scale, Float64x2Scale, 0x52959118) \

View file

@ -4031,6 +4031,8 @@ class _Float64x2 implements Float64x2 {
@pragma("vm:exact-result-type", _Float64x2)
@pragma("vm:external-name", "Float64x2_abs")
external Float64x2 abs();
@pragma("vm:recognized", "other")
@pragma("vm:exact-result-type", _Float64x2)
@pragma("vm:external-name", "Float64x2_clamp")
external Float64x2 clamp(Float64x2 lowerLimit, Float64x2 upperLimit);
@pragma("vm:recognized", "other")

View file

@ -0,0 +1,74 @@
// Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
// VMOptions=--intrinsify --optimization-counter-threshold=10 --no-background-compilation
// VMOptions=--no-intrinsify --optimization-counter-threshold=10 --no-background-compilation
// Library tag to be able to run in html test framework.
library float64x2_clamp_test;
import 'dart:math';
import 'dart:typed_data';
import 'package:expect/expect.dart';
void testClampLowerGreaterThanUpper() {
Float64x2 l = new Float64x2(1.0, 1.0);
Float64x2 u = new Float64x2(-1.0, -1.0);
Float64x2 z = new Float64x2.zero();
Float64x2 a = z.clamp(l, u);
Expect.equals(a.x, 1.0);
Expect.equals(a.y, 1.0);
}
void testClamp() {
Float64x2 l = new Float64x2(-1.0, -1.0);
Float64x2 u = new Float64x2(1.0, 1.0);
Float64x2 z = new Float64x2.zero();
Float64x2 a = z.clamp(l, u);
Expect.equals(a.x, 0.0);
Expect.equals(a.y, 0.0);
}
void testNonZeroClamp() {
Float64x2 l = new Float64x2(-pow(123456.789, 123.1) as double, -234567.89);
Float64x2 u = new Float64x2(pow(123456.789, 123.1) as double, 234567.89);
Float64x2 v =
new Float64x2(-pow(123456789.123, 123.1) as double, 234567890.123);
Float64x2 a = v.clamp(l, u);
Expect.equals(a.x, -pow(123456.789, 123) as double);
Expect.equals(a.y, 234567.89);
}
Float64x2 negativeZeroClamp() {
final negZero = -Float64x2.zero();
return negZero.clamp(negZero, Float64x2.zero());
}
Float64x2 zeroClamp() {
final negOne = -Float64x2(1.0, 1.0);
return Float64x2.zero().clamp(negOne, -Float64x2.zero());
}
void testNegativeZeroClamp(Float64x2 unopt) {
final res = negativeZeroClamp();
Expect.equals(res.x.compareTo(unopt.x), 0);
Expect.equals(res.y.compareTo(unopt.y), 0);
}
void testZeroClamp(Float64x2 unopt) {
final res = zeroClamp();
Expect.equals(res.x.compareTo(unopt.x), 0);
Expect.equals(res.y.compareTo(unopt.y), 0);
}
main() {
final unoptNegZeroClamp = negativeZeroClamp();
final unoptZeroClamp = zeroClamp();
for (int i = 0; i < 2000; i++) {
testClampLowerGreaterThanUpper();
testClamp();
testNonZeroClamp();
testNegativeZeroClamp(unoptNegZeroClamp);
testZeroClamp(unoptZeroClamp);
}
}

View file

@ -0,0 +1,75 @@
// Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
// VMOptions=--optimization-counter-threshold=10 --no-background-compilation
// @dart = 2.9
// Library tag to be able to run in html test framework.
library float64x2_clamp_test;
import 'dart:math';
import 'dart:typed_data';
import 'package:expect/expect.dart';
void testClampLowerGreaterThanUpper() {
Float64x2 l = new Float64x2(1.0, 1.0);
Float64x2 u = new Float64x2(-1.0, -1.0);
Float64x2 z = new Float64x2.zero();
Float64x2 a = z.clamp(l, u);
Expect.equals(a.x, 1.0);
Expect.equals(a.y, 1.0);
}
void testClamp() {
Float64x2 l = new Float64x2(-1.0, -1.0);
Float64x2 u = new Float64x2(1.0, 1.0);
Float64x2 z = new Float64x2.zero();
Float64x2 a = z.clamp(l, u);
Expect.equals(a.x, 0.0);
Expect.equals(a.y, 0.0);
}
void testNonZeroClamp() {
Float64x2 l = new Float64x2(-pow(123456.789, 123.1) as double, -234567.89);
Float64x2 u = new Float64x2(pow(123456.789, 123.1) as double, 234567.89);
Float64x2 v =
new Float64x2(-pow(123456789.123, 123.1) as double, 234567890.123);
Float64x2 a = v.clamp(l, u);
Expect.equals(a.x, -pow(123456.789, 123) as double);
Expect.equals(a.y, 234567.89);
}
Float64x2 negativeZeroClamp() {
final negZero = -Float64x2.zero();
return negZero.clamp(negZero, Float64x2.zero());
}
Float64x2 zeroClamp() {
final negOne = -Float64x2(1.0, 1.0);
return Float64x2.zero().clamp(negOne, -Float64x2.zero());
}
void testNegativeZeroClamp(Float64x2 unopt) {
final res = negativeZeroClamp();
Expect.equals(res.x.compareTo(unopt.x), 0);
Expect.equals(res.y.compareTo(unopt.y), 0);
}
void testZeroClamp(Float64x2 unopt) {
final res = zeroClamp();
Expect.equals(res.x.compareTo(unopt.x), 0);
Expect.equals(res.y.compareTo(unopt.y), 0);
}
main() {
final unoptNegZeroClamp = negativeZeroClamp();
final unoptZeroClamp = zeroClamp();
for (int i = 0; i < 2000; i++) {
testClampLowerGreaterThanUpper();
testClamp();
testNonZeroClamp();
testNegativeZeroClamp(unoptNegZeroClamp);
testZeroClamp(unoptZeroClamp);
}
}