[vm] Faster double.floorToDouble/ceilToDouble/truncateToDouble/roundToDouble in AOT mode

This change replaces graph intrinsics and call specializer code for
certain _Double methods with a body created in FlowGraphBuilder.

double.floorToDouble micro-benchmark on AOT/x64:
Before BenchFloorToDouble(RunTime): 642.3240205523442 us.
After: BenchFloorToDouble(RunTime): 268.0320289466631 us.

double.floor micro-benchmark on AOT/x64:
Before BenchFloor(RunTime): 760.7630277672118 us.
After: BenchFloor(RunTime): 537.2132688691916 us.

TEST=ci
Issue: https://github.com/dart-lang/sdk/issues/46650
Change-Id: I47f5d8a1bdc0f71965ad1763c7bc46540428c6cd
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/210652
Commit-Queue: Alexander Markov <alexmarkov@google.com>
Reviewed-by: Slava Egorov <vegorov@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
This commit is contained in:
Alexander Markov 2021-08-27 00:33:46 +00:00 committed by commit-bot@chromium.org
parent 25d0ae2304
commit f0d6b8864d
18 changed files with 112 additions and 175 deletions

View file

@ -31,9 +31,6 @@ class AsmIntrinsifier : public AllStatic {
public:
static intptr_t ParameterSlotFromSp();
static void IntrinsicCallPrologue(Assembler* assembler);
static void IntrinsicCallEpilogue(Assembler* assembler);
private:
friend class Intrinsifier;

View file

@ -28,24 +28,6 @@ intptr_t AsmIntrinsifier::ParameterSlotFromSp() {
return -1;
}
void AsmIntrinsifier::IntrinsicCallPrologue(Assembler* assembler) {
COMPILE_ASSERT(IsAbiPreservedRegister(CODE_REG));
COMPILE_ASSERT(IsAbiPreservedRegister(ARGS_DESC_REG));
COMPILE_ASSERT(IsAbiPreservedRegister(CALLEE_SAVED_TEMP));
// Save LR by moving it to a callee saved temporary register.
__ Comment("IntrinsicCallPrologue");
SPILLS_RETURN_ADDRESS_FROM_LR_TO_REGISTER(
__ mov(CALLEE_SAVED_TEMP, Operand(LR)));
}
void AsmIntrinsifier::IntrinsicCallEpilogue(Assembler* assembler) {
// Restore LR.
__ Comment("IntrinsicCallEpilogue");
RESTORES_RETURN_ADDRESS_FROM_REGISTER_TO_LR(
__ mov(LR, Operand(CALLEE_SAVED_TEMP)));
}
// Allocate a GrowableObjectArray:: using the backing array specified.
// On stack: type argument (+1), data (+0).
void AsmIntrinsifier::GrowableArray_Allocate(Assembler* assembler,

View file

@ -28,27 +28,6 @@ intptr_t AsmIntrinsifier::ParameterSlotFromSp() {
return -1;
}
void AsmIntrinsifier::IntrinsicCallPrologue(Assembler* assembler) {
COMPILE_ASSERT(IsAbiPreservedRegister(CODE_REG));
COMPILE_ASSERT(!IsAbiPreservedRegister(ARGS_DESC_REG));
COMPILE_ASSERT(IsAbiPreservedRegister(CALLEE_SAVED_TEMP));
COMPILE_ASSERT(IsAbiPreservedRegister(CALLEE_SAVED_TEMP2));
COMPILE_ASSERT(CALLEE_SAVED_TEMP != CODE_REG);
COMPILE_ASSERT(CALLEE_SAVED_TEMP != ARGS_DESC_REG);
COMPILE_ASSERT(CALLEE_SAVED_TEMP2 != CODE_REG);
COMPILE_ASSERT(CALLEE_SAVED_TEMP2 != ARGS_DESC_REG);
__ Comment("IntrinsicCallPrologue");
SPILLS_RETURN_ADDRESS_FROM_LR_TO_REGISTER(__ mov(CALLEE_SAVED_TEMP, LR));
__ mov(CALLEE_SAVED_TEMP2, ARGS_DESC_REG);
}
void AsmIntrinsifier::IntrinsicCallEpilogue(Assembler* assembler) {
__ Comment("IntrinsicCallEpilogue");
RESTORES_RETURN_ADDRESS_FROM_REGISTER_TO_LR(__ mov(LR, CALLEE_SAVED_TEMP));
__ mov(ARGS_DESC_REG, CALLEE_SAVED_TEMP2);
}
// Allocate a GrowableObjectArray:: using the backing array specified.
// On stack: type argument (+1), data (+0).
void AsmIntrinsifier::GrowableArray_Allocate(Assembler* assembler,

View file

@ -35,18 +35,6 @@ intptr_t AsmIntrinsifier::ParameterSlotFromSp() {
return 0;
}
void AsmIntrinsifier::IntrinsicCallPrologue(Assembler* assembler) {
COMPILE_ASSERT(CALLEE_SAVED_TEMP != ARGS_DESC_REG);
assembler->Comment("IntrinsicCallPrologue");
assembler->movl(CALLEE_SAVED_TEMP, ARGS_DESC_REG);
}
void AsmIntrinsifier::IntrinsicCallEpilogue(Assembler* assembler) {
assembler->Comment("IntrinsicCallEpilogue");
assembler->movl(ARGS_DESC_REG, CALLEE_SAVED_TEMP);
}
// Allocate a GrowableObjectArray:: using the backing array specified.
// On stack: type argument (+2), data (+1), return-address (+0).
void AsmIntrinsifier::GrowableArray_Allocate(Assembler* assembler,

View file

@ -28,22 +28,6 @@ intptr_t AsmIntrinsifier::ParameterSlotFromSp() {
return 0;
}
void AsmIntrinsifier::IntrinsicCallPrologue(Assembler* assembler) {
COMPILE_ASSERT(IsAbiPreservedRegister(CODE_REG));
COMPILE_ASSERT(!IsAbiPreservedRegister(ARGS_DESC_REG));
COMPILE_ASSERT(IsAbiPreservedRegister(CALLEE_SAVED_TEMP));
COMPILE_ASSERT(CALLEE_SAVED_TEMP != CODE_REG);
COMPILE_ASSERT(CALLEE_SAVED_TEMP != ARGS_DESC_REG);
assembler->Comment("IntrinsicCallPrologue");
assembler->movq(CALLEE_SAVED_TEMP, ARGS_DESC_REG);
}
void AsmIntrinsifier::IntrinsicCallEpilogue(Assembler* assembler) {
assembler->Comment("IntrinsicCallEpilogue");
assembler->movq(ARGS_DESC_REG, CALLEE_SAVED_TEMP);
}
// Allocate a GrowableObjectArray using the backing array specified.
// On stack: type argument (+2), data (+1), return-address (+0).
void AsmIntrinsifier::GrowableArray_Allocate(Assembler* assembler,

View file

@ -6178,10 +6178,7 @@ intptr_t InvokeMathCFunctionInstr::ArgumentCountFor(
switch (kind) {
case MethodRecognizer::kDoubleTruncate:
case MethodRecognizer::kDoubleFloor:
case MethodRecognizer::kDoubleCeil: {
ASSERT(!TargetCPUFeatures::double_truncate_round_supported());
return 1;
}
case MethodRecognizer::kDoubleCeil:
case MethodRecognizer::kDoubleRound:
case MethodRecognizer::kMathAtan:
case MethodRecognizer::kMathTan:

View file

@ -8394,6 +8394,11 @@ class DoubleToDoubleInstr : public TemplateDefinition<1, NoThrow, Pure> {
return kUnboxedDouble;
}
virtual SpeculativeMode SpeculativeModeOfInput(intptr_t idx) const {
ASSERT(idx == 0);
return kNotSpeculative;
}
virtual intptr_t DeoptimizationTarget() const { return GetDeoptId(); }
virtual bool AttributesEqual(const Instruction& other) const {
@ -8512,6 +8517,11 @@ class InvokeMathCFunctionInstr : public PureDefinition {
return kUnboxedDouble;
}
virtual SpeculativeMode SpeculativeModeOfInput(intptr_t idx) const {
ASSERT((0 <= idx) && (idx < InputCount()));
return kNotSpeculative;
}
virtual intptr_t DeoptimizationTarget() const { return GetDeoptId(); }
virtual intptr_t InputCount() const { return inputs_->length(); }

View file

@ -5344,30 +5344,36 @@ LocationSummary* InvokeMathCFunctionInstr::MakeLocationSummary(Zone* zone,
bool opt) const {
// Calling convention on x64 uses XMM0 and XMM1 to pass the first two
// double arguments and XMM0 to return the result.
//
// TODO(sjindel): allow XMM0 to be used. Requires refactoring InvokeDoublePow
// to allow input 1/output register to be equal.
ASSERT((InputCount() == 1) || (InputCount() == 2));
const intptr_t kNumTemps =
(recognized_kind() == MethodRecognizer::kMathDoublePow) ? 4 : 1;
LocationSummary* result = new (zone)
LocationSummary(zone, InputCount(), kNumTemps, LocationSummary::kCall);
ASSERT(R13 != CALLEE_SAVED_TEMP);
ASSERT(((1 << R13) & CallingConventions::kCalleeSaveCpuRegisters) != 0);
result->set_temp(0, Location::RegisterLocation(R13));
result->set_in(0, Location::FpuRegisterLocation(XMM2));
if (InputCount() == 2) {
result->set_in(1, Location::FpuRegisterLocation(XMM1));
}
if (recognized_kind() == MethodRecognizer::kMathDoublePow) {
ASSERT(InputCount() == 2);
const intptr_t kNumTemps = 4;
LocationSummary* result = new (zone)
LocationSummary(zone, InputCount(), kNumTemps, LocationSummary::kCall);
result->set_in(0, Location::FpuRegisterLocation(XMM2));
result->set_in(1, Location::FpuRegisterLocation(XMM1));
result->set_temp(0, Location::RegisterLocation(R13));
// Temp index 1.
result->set_temp(1, Location::RegisterLocation(RAX));
// Temp index 2.
result->set_temp(2, Location::FpuRegisterLocation(XMM4));
// Block XMM0 for the calling convention.
result->set_temp(3, Location::FpuRegisterLocation(XMM0));
result->set_out(0, Location::FpuRegisterLocation(XMM3));
return result;
}
result->set_out(0, Location::FpuRegisterLocation(XMM3));
ASSERT((InputCount() == 1) || (InputCount() == 2));
const intptr_t kNumTemps = 1;
LocationSummary* result = new (zone)
LocationSummary(zone, InputCount(), kNumTemps, LocationSummary::kCall);
result->set_temp(0, Location::RegisterLocation(R13));
result->set_in(0, Location::FpuRegisterLocation(XMM0));
if (InputCount() == 2) {
result->set_in(1, Location::FpuRegisterLocation(XMM1));
}
result->set_out(0, Location::FpuRegisterLocation(XMM0));
return result;
}
@ -5509,17 +5515,20 @@ void InvokeMathCFunctionInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
InvokeDoublePow(compiler, this);
return;
}
// Save RSP.
__ movq(locs()->temp(kSavedSpTempIndex).reg(), RSP);
__ ReserveAlignedFrameSpace(0);
__ movaps(XMM0, locs()->in(0).fpu_reg());
ASSERT(locs()->in(0).fpu_reg() == XMM0);
if (InputCount() == 2) {
ASSERT(locs()->in(1).fpu_reg() == XMM1);
}
// Save RSP.
__ movq(locs()->temp(kSavedSpTempIndex).reg(), RSP);
__ ReserveAlignedFrameSpace(0);
ASSERT(TargetFunction().is_leaf()); // No deopt info needed.
__ CallRuntime(TargetFunction(), InputCount());
__ movaps(locs()->out(0).fpu_reg(), XMM0);
ASSERT(locs()->out(0).fpu_reg() == XMM0);
// Restore RSP.
__ movq(RSP, locs()->temp(kSavedSpTempIndex).reg());
}

View file

@ -968,21 +968,6 @@ bool CallSpecializer::TryInlineInstanceGetter(InstanceCallInstr* call) {
return TryInlineImplicitInstanceGetter(call);
}
void CallSpecializer::ReplaceWithMathCFunction(
InstanceCallInstr* call,
MethodRecognizer::Kind recognized_kind) {
ASSERT(call->type_args_len() == 0);
AddReceiverCheck(call);
ZoneGrowableArray<Value*>* args =
new (Z) ZoneGrowableArray<Value*>(call->ArgumentCount());
for (intptr_t i = 0; i < call->ArgumentCount(); i++) {
args->Add(new (Z) Value(call->ArgumentAt(i)));
}
InvokeMathCFunctionInstr* invoke = new (Z) InvokeMathCFunctionInstr(
args, call->deopt_id(), recognized_kind, call->source());
ReplaceCall(call, invoke);
}
// Inline only simple, frequently called core library methods.
bool CallSpecializer::TryInlineInstanceMethod(InstanceCallInstr* call) {
const CallTargets& targets = call->Targets();
@ -1034,23 +1019,6 @@ bool CallSpecializer::TryInlineInstanceMethod(InstanceCallInstr* call) {
ReplaceCall(call, d2i_instr);
return true;
}
case MethodRecognizer::kDoubleMod:
case MethodRecognizer::kDoubleRound:
ReplaceWithMathCFunction(call, recognized_kind);
return true;
case MethodRecognizer::kDoubleTruncate:
case MethodRecognizer::kDoubleFloor:
case MethodRecognizer::kDoubleCeil:
if (!TargetCPUFeatures::double_truncate_round_supported()) {
ReplaceWithMathCFunction(call, recognized_kind);
} else {
AddReceiverCheck(call);
DoubleToDoubleInstr* d2d_instr =
new (Z) DoubleToDoubleInstr(new (Z) Value(call->ArgumentAt(0)),
recognized_kind, call->deopt_id());
ReplaceCall(call, d2d_instr);
}
return true;
default:
break;
}

View file

@ -172,9 +172,6 @@ class CallSpecializer : public FlowGraphVisitor {
bool TryOptimizeInstanceOfUsingStaticTypes(InstanceCallInstr* call,
const AbstractType& type);
void ReplaceWithMathCFunction(InstanceCallInstr* call,
MethodRecognizer::Kind recognized_kind);
bool TryStringLengthOneEquality(InstanceCallInstr* call, Token::Kind op_kind);
void SpecializePolymorphicInstanceCall(PolymorphicInstanceCallInstr* call);

View file

@ -1193,5 +1193,25 @@ Fragment BaseFlowGraphBuilder::InitConstantParameters() {
return instructions;
}
Fragment BaseFlowGraphBuilder::InvokeMathCFunction(
MethodRecognizer::Kind recognized_kind,
intptr_t num_inputs) {
InputsArray* args = GetArguments(num_inputs);
auto* instr = new (Z)
InvokeMathCFunctionInstr(args, GetNextDeoptId(), recognized_kind,
InstructionSource(TokenPosition::kNoSource));
Push(instr);
return Fragment(instr);
}
Fragment BaseFlowGraphBuilder::DoubleToDouble(
MethodRecognizer::Kind recognized_kind) {
Value* value = Pop();
auto* instr =
new (Z) DoubleToDoubleInstr(value, recognized_kind, GetNextDeoptId());
Push(instr);
return Fragment(instr);
}
} // namespace kernel
} // namespace dart

View file

@ -436,6 +436,14 @@ class BaseFlowGraphBuilder {
// Sets raw parameter variables to inferred constant values.
Fragment InitConstantParameters();
Fragment InvokeMathCFunction(MethodRecognizer::Kind recognized_kind,
intptr_t num_inputs);
// Pops double value and converts it to double as specified
// by the recognized method (kDoubleTruncate,
// kDoubleFloor or kDoubleCeil).
Fragment DoubleToDouble(MethodRecognizer::Kind recognized_kind);
// Returns whether this function has a saved arguments descriptor array.
bool has_saved_args_desc_array() {
return function_.HasSavedArgumentsDescriptor();

View file

@ -988,7 +988,7 @@ FlowGraph* StreamingFlowGraphBuilder::BuildGraph() {
case UntaggedFunction::kSetterFunction:
case UntaggedFunction::kClosureFunction:
case UntaggedFunction::kConstructor: {
if (B->IsRecognizedMethodForFlowGraph(function)) {
if (FlowGraphBuilder::IsRecognizedMethodForFlowGraph(function)) {
return B->BuildGraphOfRecognizedMethod(function);
}
return BuildGraphOfFunction(function.IsGenerativeConstructor());

View file

@ -8,6 +8,7 @@
#include "platform/globals.h"
#include "vm/class_id.h"
#include "vm/compiler/aot/precompiler.h"
#include "vm/compiler/backend/flow_graph_compiler.h"
#include "vm/compiler/backend/il.h"
#include "vm/compiler/backend/il_printer.h"
#include "vm/compiler/backend/locations.h"
@ -891,6 +892,12 @@ bool FlowGraphBuilder::IsRecognizedMethodForFlowGraph(
case MethodRecognizer::kUtf8DecoderScan:
case MethodRecognizer::kHas63BitSmis:
return true;
case MethodRecognizer::kDoubleMod:
case MethodRecognizer::kDoubleRound:
case MethodRecognizer::kDoubleTruncate:
case MethodRecognizer::kDoubleFloor:
case MethodRecognizer::kDoubleCeil:
return FlowGraphCompiler::SupportsUnboxedDoubles();
default:
return false;
}
@ -1506,6 +1513,24 @@ FlowGraph* FlowGraphBuilder::BuildGraphOfRecognizedMethod(
body += LoadIndexed(kIntPtrCid);
body += Box(kUnboxedIntPtr);
} break;
case MethodRecognizer::kDoubleMod:
case MethodRecognizer::kDoubleRound:
case MethodRecognizer::kDoubleTruncate:
case MethodRecognizer::kDoubleFloor:
case MethodRecognizer::kDoubleCeil: {
for (intptr_t i = 0, n = function.NumParameters(); i < n; ++i) {
body += LoadLocal(parsed_function_->RawParameterVariable(i));
}
if (!CompilerState::Current().is_aot() &&
TargetCPUFeatures::double_truncate_round_supported() &&
((kind == MethodRecognizer::kDoubleTruncate) ||
(kind == MethodRecognizer::kDoubleFloor) ||
(kind == MethodRecognizer::kDoubleCeil))) {
body += DoubleToDouble(kind);
} else {
body += InvokeMathCFunction(kind, function.NumParameters());
}
} break;
default: {
UNREACHABLE();
break;

View file

@ -65,6 +65,10 @@ class FlowGraphBuilder : public BaseFlowGraphBuilder {
FlowGraph* BuildGraph();
// Returns true if given [function] is recognized for flow
// graph building and its body is expressed in a custom-built IL.
static bool IsRecognizedMethodForFlowGraph(const Function& function);
private:
BlockEntryInstr* BuildPrologue(BlockEntryInstr* normal_entry,
PrologueInfo* prologue_info);
@ -145,8 +149,6 @@ class FlowGraphBuilder : public BaseFlowGraphBuilder {
Fragment NativeFunctionBody(const Function& function,
LocalVariable* first_parameter);
// Every recognized method has a body expressed in IL.
bool IsRecognizedMethodForFlowGraph(const Function& function);
FlowGraph* BuildGraphOfRecognizedMethod(const Function& function);
Fragment BuildTypedDataViewFactoryConstructor(const Function& function,

View file

@ -1121,40 +1121,6 @@ bool GraphIntrinsifier::Build_MathLog(FlowGraph* flow_graph) {
return BuildInvokeMathCFunction(flow_graph, MethodRecognizer::kMathLog);
}
bool GraphIntrinsifier::Build_DoubleMod(FlowGraph* flow_graph) {
return BuildInvokeMathCFunction(flow_graph, MethodRecognizer::kDoubleMod,
/* num_parameters = */ 2);
}
bool GraphIntrinsifier::Build_DoubleCeil(FlowGraph* flow_graph) {
// TODO(johnmccutchan): On X86 this intrinsic can be written in a different
// way.
if (TargetCPUFeatures::double_truncate_round_supported()) return false;
return BuildInvokeMathCFunction(flow_graph, MethodRecognizer::kDoubleCeil);
}
bool GraphIntrinsifier::Build_DoubleFloor(FlowGraph* flow_graph) {
// TODO(johnmccutchan): On X86 this intrinsic can be written in a different
// way.
if (TargetCPUFeatures::double_truncate_round_supported()) return false;
return BuildInvokeMathCFunction(flow_graph, MethodRecognizer::kDoubleFloor);
}
bool GraphIntrinsifier::Build_DoubleTruncate(FlowGraph* flow_graph) {
// TODO(johnmccutchan): On X86 this intrinsic can be written in a different
// way.
if (TargetCPUFeatures::double_truncate_round_supported()) return false;
return BuildInvokeMathCFunction(flow_graph,
MethodRecognizer::kDoubleTruncate);
}
bool GraphIntrinsifier::Build_DoubleRound(FlowGraph* flow_graph) {
return BuildInvokeMathCFunction(flow_graph, MethodRecognizer::kDoubleRound);
}
bool GraphIntrinsifier::Build_ImplicitGetter(FlowGraph* flow_graph) {
// This code will only be invoked if our assumptions have been met (see
// [Intrinsifier::CanIntrinsifyFieldAccessor])

View file

@ -87,6 +87,11 @@ namespace dart {
V(_Double, _sub, DoubleSub, 0x28474c2e) \
V(_Double, _mul, DoubleMul, 0x1f98c76c) \
V(_Double, _div, DoubleDiv, 0x287d3791) \
V(_Double, _modulo, DoubleMod, 0xfdb397ef) \
V(_Double, ceilToDouble, DoubleCeil, 0x5f1bced9) \
V(_Double, floorToDouble, DoubleFloor, 0x54b4cb48) \
V(_Double, roundToDouble, DoubleRound, 0x5649ca00) \
V(_Double, truncateToDouble, DoubleTruncate, 0x62d48659) \
V(::, min, MathMin, 0x504a28df) \
V(::, max, MathMax, 0xead7161a) \
V(::, _doublePow, MathDoublePow, 0x989f3334) \
@ -366,11 +371,6 @@ namespace dart {
V(_IntegerImplementation, >>, Integer_sar, 0x4a3615a7) \
V(_IntegerImplementation, >>>, Integer_shr, 0x2bac5209) \
V(_Double, unary-, DoubleFlipSignBit, 0x3d39082b) \
V(_Double, truncateToDouble, DoubleTruncate, 0x62d48298) \
V(_Double, roundToDouble, DoubleRound, 0x5649c63f) \
V(_Double, floorToDouble, DoubleFloor, 0x54b4c787) \
V(_Double, ceilToDouble, DoubleCeil, 0x5f1bcb18) \
V(_Double, _modulo, DoubleMod, 0xfdb3942e)
#define GRAPH_INTRINSICS_LIST(V) \
GRAPH_CORE_INTRINSICS_LIST(V) \

View file

@ -67,7 +67,8 @@ class _Double implements double {
return _modulo(other.toDouble());
}
@pragma("vm:recognized", "graph-intrinsic")
@pragma("vm:recognized", "other")
@pragma("vm:prefer-inline")
@pragma("vm:exact-result-type", _Double)
double _modulo(double other) native "Double_modulo";
@ -181,16 +182,20 @@ class _Double implements double {
int ceil() => ceilToDouble().toInt();
int truncate() => truncateToDouble().toInt();
@pragma("vm:recognized", "graph-intrinsic")
@pragma("vm:recognized", "other")
@pragma("vm:prefer-inline")
@pragma("vm:exact-result-type", _Double)
double roundToDouble() native "Double_round";
@pragma("vm:recognized", "graph-intrinsic")
@pragma("vm:recognized", "other")
@pragma("vm:prefer-inline")
@pragma("vm:exact-result-type", _Double)
double floorToDouble() native "Double_floor";
@pragma("vm:recognized", "graph-intrinsic")
@pragma("vm:recognized", "other")
@pragma("vm:prefer-inline")
@pragma("vm:exact-result-type", _Double)
double ceilToDouble() native "Double_ceil";
@pragma("vm:recognized", "graph-intrinsic")
@pragma("vm:recognized", "other")
@pragma("vm:prefer-inline")
@pragma("vm:exact-result-type", _Double)
double truncateToDouble() native "Double_truncate";