mirror of
https://github.com/dart-lang/sdk
synced 2024-09-16 01:59:38 +00:00
[VM] Pick more compact instruction encodings on x64
Several tricks here: * When zeroing registers we can use xorl instead of xorq because the 'l' variant will zero the top bits. * test and 'and' instructions with immediate arguments can use 8-bit and 32 bit variants more heavily. * mov reg, immediate can use more compact encodings when sign-extension is not needed. Performance is better than +1% when measured on Dart2JS. R=vegorov@google.com Intel optimization manual says: "Assembly/Compiler Coding Rule 64. (H impact, M generality) Use the 32-bit versions of instructions in 64-bit mode to reduce code size unless the 64-bit version is necessary to access 64-bit data or additional registers." Bug: Change-Id: I2a989315c45f8d8ebab719653fbfa2b18ebb77c9 Reviewed-on: https://dart-review.googlesource.com/23400 Commit-Queue: Erik Corry <erikcorry@google.com> Reviewed-by: Vyacheslav Egorov <vegorov@google.com>
This commit is contained in:
parent
062e5d6db3
commit
1d9ff70a08
|
@ -328,16 +328,25 @@ void Assembler::movw(const Address& dst, const Immediate& imm) {
|
|||
|
||||
void Assembler::movq(Register dst, const Immediate& imm) {
|
||||
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
|
||||
if (imm.is_int32()) {
|
||||
if (imm.is_uint32()) {
|
||||
// Pick single byte B8 encoding if possible. If dst < 8 then we also omit
|
||||
// the Rex byte.
|
||||
EmitRegisterREX(dst, REX_NONE);
|
||||
EmitUint8(0xB8 | (dst & 7));
|
||||
EmitUInt32(imm.value());
|
||||
} else if (imm.is_int32()) {
|
||||
// Sign extended C7 Cx encoding if we have a negative input.
|
||||
Operand operand(dst);
|
||||
EmitOperandREX(0, operand, REX_W);
|
||||
EmitUint8(0xC7);
|
||||
EmitOperand(0, operand);
|
||||
EmitImmediate(imm);
|
||||
} else {
|
||||
// Full 64 bit immediate encoding.
|
||||
EmitRegisterREX(dst, REX_W);
|
||||
EmitUint8(0xB8 | (dst & 7));
|
||||
EmitImmediate(imm);
|
||||
}
|
||||
EmitImmediate(imm);
|
||||
}
|
||||
|
||||
// Use 0x89 encoding (instead of 0x8B encoding), which is expected by gdb64
|
||||
|
@ -1297,37 +1306,6 @@ void Assembler::testl(Register reg1, Register reg2) {
|
|||
EmitOperand(reg1 & 7, operand);
|
||||
}
|
||||
|
||||
void Assembler::testl(Register reg, const Immediate& imm) {
|
||||
// TODO(kasperl): Deal with registers r8-r15 using the short
|
||||
// encoding form of the immediate?
|
||||
|
||||
// We are using RBP for the exception marker. See testl(Label*).
|
||||
ASSERT(reg != RBP);
|
||||
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
|
||||
// For registers that have a byte variant (RAX, RBX, RCX, and RDX)
|
||||
// we only test the byte register to keep the encoding short.
|
||||
if (imm.is_uint8() && reg < 4) {
|
||||
// Use zero-extended 8-bit immediate.
|
||||
if (reg == RAX) {
|
||||
EmitUint8(0xA8);
|
||||
} else {
|
||||
EmitUint8(0xF6);
|
||||
EmitUint8(0xC0 + reg);
|
||||
}
|
||||
EmitUint8(imm.value() & 0xFF);
|
||||
} else {
|
||||
ASSERT(imm.is_int32());
|
||||
if (reg == RAX) {
|
||||
EmitUint8(0xA9);
|
||||
} else {
|
||||
EmitRegisterREX(reg, REX_NONE);
|
||||
EmitUint8(0xF7);
|
||||
EmitUint8(0xC0 | (reg & 7));
|
||||
}
|
||||
EmitImmediate(imm);
|
||||
}
|
||||
}
|
||||
|
||||
void Assembler::testb(const Address& address, const Immediate& imm) {
|
||||
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
|
||||
EmitOperandREX(0, address, REX_NONE);
|
||||
|
@ -1346,27 +1324,37 @@ void Assembler::testq(Register reg1, Register reg2) {
|
|||
}
|
||||
|
||||
void Assembler::testq(Register reg, const Immediate& imm) {
|
||||
// TODO(kasperl): Deal with registers r8-r15 using the short
|
||||
// encoding form of the immediate?
|
||||
|
||||
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
|
||||
// For registers that have a byte variant (RAX, RBX, RCX, and RDX)
|
||||
// we only test the byte register to keep the encoding short.
|
||||
if (imm.is_uint8() && reg < 4) {
|
||||
if (imm.is_uint8()) {
|
||||
// Use zero-extended 8-bit immediate.
|
||||
if (reg >= 4) {
|
||||
// We need the Rex byte to give access to the SIL and DIL registers (the
|
||||
// low bytes of RSI and RDI).
|
||||
EmitRegisterREX(reg, REX_NONE, /* force = */ true);
|
||||
}
|
||||
if (reg == RAX) {
|
||||
EmitUint8(0xA8);
|
||||
} else {
|
||||
EmitUint8(0xF6);
|
||||
EmitUint8(0xC0 + reg);
|
||||
EmitUint8(0xC0 + (reg & 7));
|
||||
}
|
||||
EmitUint8(imm.value() & 0xFF);
|
||||
} else {
|
||||
ASSERT(imm.is_int32());
|
||||
} else if (imm.is_uint32()) {
|
||||
if (reg == RAX) {
|
||||
EmitUint8(0xA9 | REX_W);
|
||||
EmitUint8(0xA9);
|
||||
} else {
|
||||
EmitRegisterREX(reg, REX_NONE);
|
||||
EmitUint8(0xF7);
|
||||
EmitUint8(0xC0 | (reg & 7));
|
||||
}
|
||||
EmitUInt32(imm.value());
|
||||
} else {
|
||||
// Sign extended version of 32 bit test.
|
||||
ASSERT(imm.is_int32());
|
||||
EmitRegisterREX(reg, REX_W);
|
||||
if (reg == RAX) {
|
||||
EmitUint8(0xA9);
|
||||
} else {
|
||||
EmitRegisterREX(reg, REX_W);
|
||||
EmitUint8(0xF7);
|
||||
EmitUint8(0xC0 | (reg & 7));
|
||||
}
|
||||
|
@ -1375,7 +1363,7 @@ void Assembler::testq(Register reg, const Immediate& imm) {
|
|||
}
|
||||
|
||||
void Assembler::TestImmediate(Register dst, const Immediate& imm) {
|
||||
if (imm.is_int32()) {
|
||||
if (imm.is_int32() || imm.is_uint32()) {
|
||||
testq(dst, imm);
|
||||
} else {
|
||||
ASSERT(dst != TMP);
|
||||
|
@ -1444,7 +1432,27 @@ void Assembler::AluQ(uint8_t modrm_opcode,
|
|||
uint8_t opcode,
|
||||
Register dst,
|
||||
const Immediate& imm) {
|
||||
if (imm.is_int32()) {
|
||||
if (modrm_opcode == 4 && imm.is_uint32()) {
|
||||
// We can use andl for andq.
|
||||
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
|
||||
EmitRegisterREX(dst, REX_NONE);
|
||||
// Would like to use EmitComplex here, but it doesn't like uint32
|
||||
// immediates.
|
||||
if (imm.is_int8()) {
|
||||
// Use sign-extended 8-bit immediate.
|
||||
EmitUint8(0x83);
|
||||
EmitOperand(modrm_opcode, Operand(dst));
|
||||
EmitUint8(imm.value() & 0xFF);
|
||||
} else {
|
||||
if (dst == RAX) {
|
||||
EmitUint8(0x25);
|
||||
} else {
|
||||
EmitUint8(0x81);
|
||||
EmitOperand(modrm_opcode, Operand(dst));
|
||||
}
|
||||
EmitUInt32(imm.value());
|
||||
}
|
||||
} else if (imm.is_int32()) {
|
||||
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
|
||||
EmitRegisterREX(dst, REX_W);
|
||||
EmitComplex(modrm_opcode, Operand(dst), imm);
|
||||
|
@ -1470,7 +1478,7 @@ void Assembler::AluQ(uint8_t modrm_opcode,
|
|||
}
|
||||
|
||||
void Assembler::AndImmediate(Register dst, const Immediate& imm) {
|
||||
if (imm.is_int32()) {
|
||||
if (imm.is_int32() || imm.is_uint32()) {
|
||||
andq(dst, imm);
|
||||
} else {
|
||||
ASSERT(dst != TMP);
|
||||
|
@ -2260,7 +2268,9 @@ intptr_t Assembler::FindImmediate(int64_t imm) {
|
|||
}
|
||||
|
||||
void Assembler::LoadImmediate(Register reg, const Immediate& imm) {
|
||||
if (imm.is_int32() || !constant_pool_allowed()) {
|
||||
if (imm.value() == 0) {
|
||||
xorl(reg, reg);
|
||||
} else if (imm.is_int32() || !constant_pool_allowed()) {
|
||||
movq(reg, imm);
|
||||
} else {
|
||||
int32_t offset = ObjectPool::element_offset(FindImmediate(imm.value()));
|
||||
|
|
|
@ -33,6 +33,7 @@ class Immediate : public ValueObject {
|
|||
bool is_uint8() const { return Utils::IsUint(8, value_); }
|
||||
bool is_uint16() const { return Utils::IsUint(16, value_); }
|
||||
bool is_int32() const { return Utils::IsInt(32, value_); }
|
||||
bool is_uint32() const { return Utils::IsUint(32, value_); }
|
||||
|
||||
private:
|
||||
const int64_t value_;
|
||||
|
@ -511,7 +512,7 @@ class Assembler : public ValueObject {
|
|||
void CompareImmediate(const Address& address, const Immediate& imm);
|
||||
|
||||
void testl(Register reg1, Register reg2);
|
||||
void testl(Register reg, const Immediate& imm);
|
||||
void testl(Register reg, const Immediate& imm) { testq(reg, imm); }
|
||||
void testb(const Address& address, const Immediate& imm);
|
||||
|
||||
void testq(Register reg1, Register reg2);
|
||||
|
@ -712,7 +713,9 @@ class Assembler : public ValueObject {
|
|||
bool constant_pool_allowed() const { return constant_pool_allowed_; }
|
||||
void set_constant_pool_allowed(bool b) { constant_pool_allowed_ = b; }
|
||||
|
||||
// Unlike movq this can affect the flags or use the constant pool.
|
||||
void LoadImmediate(Register reg, const Immediate& imm);
|
||||
|
||||
void LoadIsolate(Register dst);
|
||||
void LoadObject(Register dst, const Object& obj);
|
||||
void LoadUniqueObject(Register dst, const Object& obj);
|
||||
|
@ -1023,9 +1026,12 @@ class Assembler : public ValueObject {
|
|||
|
||||
inline void EmitUint8(uint8_t value);
|
||||
inline void EmitInt32(int32_t value);
|
||||
inline void EmitUInt32(uint32_t value);
|
||||
inline void EmitInt64(int64_t value);
|
||||
|
||||
inline void EmitRegisterREX(Register reg, uint8_t rex);
|
||||
inline void EmitRegisterREX(Register reg,
|
||||
uint8_t rex,
|
||||
bool force_emit = false);
|
||||
inline void EmitOperandREX(int rm, const Operand& operand, uint8_t rex);
|
||||
inline void EmitXmmRegisterOperand(int rm, XmmRegister reg);
|
||||
inline void EmitFixup(AssemblerFixup* fixup);
|
||||
|
@ -1077,14 +1083,18 @@ inline void Assembler::EmitInt32(int32_t value) {
|
|||
buffer_.Emit<int32_t>(value);
|
||||
}
|
||||
|
||||
inline void Assembler::EmitUInt32(uint32_t value) {
|
||||
buffer_.Emit<uint32_t>(value);
|
||||
}
|
||||
|
||||
inline void Assembler::EmitInt64(int64_t value) {
|
||||
buffer_.Emit<int64_t>(value);
|
||||
}
|
||||
|
||||
inline void Assembler::EmitRegisterREX(Register reg, uint8_t rex) {
|
||||
inline void Assembler::EmitRegisterREX(Register reg, uint8_t rex, bool force) {
|
||||
ASSERT(reg != kNoRegister);
|
||||
rex |= (reg > 7 ? REX_B : REX_NONE);
|
||||
if (rex != REX_NONE) EmitUint8(REX_PREFIX | rex);
|
||||
if (rex != REX_NONE || force) EmitUint8(REX_PREFIX | rex);
|
||||
}
|
||||
|
||||
inline void Assembler::EmitOperandREX(int rm,
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1156,6 +1156,8 @@ bool DisassemblerX64::DecodeInstructionType(uint8_t** data) {
|
|||
// TODO(srdjan): Should we enable printing of REX.W?
|
||||
// if (rex_w()) Print("REX.W ");
|
||||
Print("%s%s", idesc.mnem, operand_size_code());
|
||||
} else if (current == 0xC3 || current == 0xCC) {
|
||||
Print("%s", idesc.mnem); // ret and int3 don't need a size specifier.
|
||||
} else {
|
||||
Print("%s%s", idesc.mnem, operand_size_code());
|
||||
}
|
||||
|
@ -1215,7 +1217,7 @@ bool DisassemblerX64::DecodeInstructionType(uint8_t** data) {
|
|||
}
|
||||
|
||||
case SHORT_IMMEDIATE_INSTR: {
|
||||
Print("%s %s, ", idesc.mnem, Rax());
|
||||
Print("%s%s %s,", idesc.mnem, operand_size_code(), Rax());
|
||||
PrintImmediate(*data + 1, DOUBLEWORD_SIZE);
|
||||
(*data) += 5;
|
||||
break;
|
||||
|
@ -1901,27 +1903,10 @@ int DisassemblerX64::InstructionDecode(uword pc) {
|
|||
break;
|
||||
|
||||
case 0xA9: {
|
||||
int64_t value = 0;
|
||||
bool check_for_stop = false;
|
||||
switch (operand_size()) {
|
||||
case WORD_SIZE:
|
||||
value = *reinterpret_cast<uint16_t*>(data + 1);
|
||||
data += 3;
|
||||
break;
|
||||
case DOUBLEWORD_SIZE:
|
||||
value = *reinterpret_cast<uint32_t*>(data + 1);
|
||||
data += 5;
|
||||
check_for_stop = true;
|
||||
break;
|
||||
case QUADWORD_SIZE:
|
||||
value = *reinterpret_cast<int32_t*>(data + 1);
|
||||
data += 5;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
data++;
|
||||
bool check_for_stop = operand_size() == DOUBLEWORD_SIZE;
|
||||
Print("test%s %s,", operand_size_code(), Rax());
|
||||
PrintImmediateValue(value);
|
||||
data += PrintImmediate(data, operand_size());
|
||||
if (check_for_stop) {
|
||||
CheckPrintStop(data);
|
||||
}
|
||||
|
|
|
@ -1342,7 +1342,7 @@ void FlowGraphCompiler::EmitOptimizedStaticCall(
|
|||
(isolate()->reify_generic_functions() && function.IsGeneric())) {
|
||||
__ LoadObject(R10, arguments_descriptor);
|
||||
} else {
|
||||
__ xorq(R10, R10); // GC safe smi zero because of stub.
|
||||
__ xorl(R10, R10); // GC safe smi zero because of stub.
|
||||
}
|
||||
// Do not use the code from the function, but let the code be patched so that
|
||||
// we can record the outgoing edges to other code.
|
||||
|
@ -1532,7 +1532,7 @@ void ParallelMoveResolver::EmitMove(int index) {
|
|||
const Object& constant = source.constant();
|
||||
if (destination.IsRegister()) {
|
||||
if (constant.IsSmi() && (Smi::Cast(constant).Value() == 0)) {
|
||||
__ xorq(destination.reg(), destination.reg());
|
||||
__ xorl(destination.reg(), destination.reg());
|
||||
} else if (constant.IsSmi() &&
|
||||
(source.constant_instruction()->representation() ==
|
||||
kUnboxedInt32)) {
|
||||
|
|
|
@ -199,7 +199,7 @@ void Intrinsifier::GrowableArray_add(Assembler* assembler) {
|
|||
__ jmp(&done, Assembler::kNearJump); \
|
||||
\
|
||||
__ Bind(&size_tag_overflow); \
|
||||
__ movq(RDI, Immediate(0)); \
|
||||
__ LoadImmediate(RDI, Immediate(0)); \
|
||||
__ Bind(&done); \
|
||||
\
|
||||
/* Get the class index and insert it into the tags. */ \
|
||||
|
@ -1768,7 +1768,7 @@ void GenerateSubstringMatchesSpecialization(Assembler* assembler,
|
|||
|
||||
__ SmiUntag(RBX); // start
|
||||
__ SmiUntag(R9); // other.length
|
||||
__ movq(R11, Immediate(0)); // i = 0
|
||||
__ LoadImmediate(R11, Immediate(0)); // i = 0
|
||||
|
||||
// do
|
||||
Label loop;
|
||||
|
|
|
@ -4141,8 +4141,8 @@ class Instructions : public Object {
|
|||
static const intptr_t kCheckedEntryOffset = 0;
|
||||
static const intptr_t kUncheckedEntryOffset = 0;
|
||||
#elif defined(TARGET_ARCH_X64)
|
||||
static const intptr_t kCheckedEntryOffset = 16;
|
||||
static const intptr_t kUncheckedEntryOffset = 38;
|
||||
static const intptr_t kCheckedEntryOffset = 15;
|
||||
static const intptr_t kUncheckedEntryOffset = 34;
|
||||
#elif defined(TARGET_ARCH_ARM)
|
||||
static const intptr_t kCheckedEntryOffset = 8;
|
||||
static const intptr_t kUncheckedEntryOffset = 32;
|
||||
|
|
|
@ -671,7 +671,7 @@ void StubCode::GenerateAllocateArrayStub(Assembler* assembler) {
|
|||
__ jmp(&done, Assembler::kNearJump);
|
||||
|
||||
__ Bind(&size_tag_overflow);
|
||||
__ movq(RDI, Immediate(0));
|
||||
__ LoadImmediate(RDI, Immediate(0));
|
||||
__ Bind(&done);
|
||||
|
||||
// Get the class index and insert it into the tags.
|
||||
|
@ -820,7 +820,7 @@ void StubCode::GenerateInvokeDartCodeStub(Assembler* assembler) {
|
|||
Label push_arguments;
|
||||
Label done_push_arguments;
|
||||
__ j(ZERO, &done_push_arguments, Assembler::kNearJump);
|
||||
__ movq(RAX, Immediate(0));
|
||||
__ LoadImmediate(RAX, Immediate(0));
|
||||
__ Bind(&push_arguments);
|
||||
__ pushq(Address(RDX, RAX, TIMES_8, 0));
|
||||
__ incq(RAX);
|
||||
|
@ -925,7 +925,7 @@ void StubCode::GenerateAllocateContextStub(Assembler* assembler) {
|
|||
|
||||
__ Bind(&size_tag_overflow);
|
||||
// Set overflow size tag value.
|
||||
__ movq(R13, Immediate(0));
|
||||
__ LoadImmediate(R13, Immediate(0));
|
||||
|
||||
__ Bind(&done);
|
||||
// RAX: new object.
|
||||
|
|
Loading…
Reference in a new issue