Adds more SIMD instructions to arm64.

R=regis@google.com

Review URL: https://codereview.chromium.org//295243005

git-svn-id: https://dart.googlecode.com/svn/branches/bleeding_edge/dart@36585 260f80e4-7a28-3924-810f-c04153c831b5
This commit is contained in:
zra@google.com 2014-05-23 20:34:29 +00:00
parent 1f51fa4413
commit 0676e14a22
9 changed files with 1197 additions and 197 deletions

View file

@ -279,6 +279,7 @@ typedef uintptr_t uword;
const int kWordSize = sizeof(word);
const int kDoubleSize = sizeof(double); // NOLINT
const int kFloatSize = sizeof(float); // NOLINT
const int kQuadSize = 4 * kFloatSize;
const int kSimd128Size = sizeof(simd128_value_t); // NOLINT
#ifdef ARCH_IS_32_BIT
const int kWordSizeLog2 = 2;

View file

@ -442,8 +442,6 @@ class Assembler : public ValueObject {
}
// Logical immediate operations.
// TODO(zra): Add macros that check IsImmLogical, and fall back on a longer
// sequence on failure.
void andi(Register rd, Register rn, uint64_t imm) {
Operand imm_op;
const bool immok = Operand::IsImmLogical(imm, kXRegSizeInBits, &imm_op);
@ -734,6 +732,27 @@ class Assembler : public ValueObject {
}
// SIMD operations.
void vand(VRegister vd, VRegister vn, VRegister vm) {
EmitSIMDThreeSameOp(VAND, vd, vn, vm);
}
void vorr(VRegister vd, VRegister vn, VRegister vm) {
EmitSIMDThreeSameOp(VORR, vd, vn, vm);
}
void veor(VRegister vd, VRegister vn, VRegister vm) {
EmitSIMDThreeSameOp(VEOR, vd, vn, vm);
}
void vaddw(VRegister vd, VRegister vn, VRegister vm) {
EmitSIMDThreeSameOp(VADDW, vd, vn, vm);
}
void vaddx(VRegister vd, VRegister vn, VRegister vm) {
EmitSIMDThreeSameOp(VADDX, vd, vn, vm);
}
void vsubw(VRegister vd, VRegister vn, VRegister vm) {
EmitSIMDThreeSameOp(VSUBW, vd, vn, vm);
}
void vsubx(VRegister vd, VRegister vn, VRegister vm) {
EmitSIMDThreeSameOp(VSUBX, vd, vn, vm);
}
void vadds(VRegister vd, VRegister vn, VRegister vm) {
EmitSIMDThreeSameOp(VADDS, vd, vn, vm);
}
@ -758,18 +777,57 @@ class Assembler : public ValueObject {
void vdivd(VRegister vd, VRegister vn, VRegister vm) {
EmitSIMDThreeSameOp(VDIVD, vd, vn, vm);
}
void vnot(VRegister vd, VRegister vn) {
EmitSIMDTwoRegOp(VNOT, vd, vn);
}
void vabss(VRegister vd, VRegister vn) {
EmitSIMDTwoRegOp(VABSS, vd, vn);
}
void vabsd(VRegister vd, VRegister vn) {
EmitSIMDTwoRegOp(VABSD, vd, vn);
}
void vnegs(VRegister vd, VRegister vn) {
EmitSIMDTwoRegOp(VNEGS, vd, vn);
}
void vnegd(VRegister vd, VRegister vn) {
EmitSIMDTwoRegOp(VNEGD, vd, vn);
}
void vdupw(VRegister vd, Register rn) {
const VRegister vn = static_cast<VRegister>(rn);
EmitSIMDCopyOp(VDUPI, vd, vn, kWord, 0, 0);
}
void vdupx(VRegister vd, Register rn) {
const VRegister vn = static_cast<VRegister>(rn);
EmitSIMDCopyOp(VDUPI, vd, vn, kDoubleWord, 0, 0);
}
void vdups(VRegister vd, VRegister vn, int32_t idx) {
EmitSIMDCopyOp(VDUP, vd, vn, kSWord, 0, idx);
}
void vdupd(VRegister vd, VRegister vn, int32_t idx) {
EmitSIMDCopyOp(VDUP, vd, vn, kDWord, 0, idx);
}
void vinsw(VRegister vd, int32_t didx, Register rn) {
const VRegister vn = static_cast<VRegister>(rn);
EmitSIMDCopyOp(VINSI, vd, vn, kWord, 0, didx);
}
void vinsx(VRegister vd, int32_t didx, Register rn) {
const VRegister vn = static_cast<VRegister>(rn);
EmitSIMDCopyOp(VINSI, vd, vn, kDoubleWord, 0, didx);
}
void vinss(VRegister vd, int32_t didx, VRegister vn, int32_t sidx) {
EmitSIMDCopyOp(VINS, vd, vn, kSWord, sidx, didx);
}
void vinsd(VRegister vd, int32_t didx, VRegister vn, int32_t sidx) {
EmitSIMDCopyOp(VINS, vd, vn, kDWord, sidx, didx);
}
void vmovrs(Register rd, VRegister vn, int32_t sidx) {
const VRegister vd = static_cast<VRegister>(rd);
EmitSIMDCopyOp(VMOVW, vd, vn, kWord, 0, sidx);
}
void vmovrd(Register rd, VRegister vn, int32_t sidx) {
const VRegister vd = static_cast<VRegister>(rd);
EmitSIMDCopyOp(VMOVX, vd, vn, kDoubleWord, 0, sidx);
}
// Aliases.
void mov(Register rd, Register rn) {
@ -779,6 +837,9 @@ class Assembler : public ValueObject {
orr(rd, ZR, Operand(rn));
}
}
void vmov(VRegister vd, VRegister vn) {
vorr(vd, vn, vn);
}
void mvn(Register rd, Register rm) {
orn(rd, ZR, Operand(rm));
}
@ -799,11 +860,23 @@ class Assembler : public ValueObject {
ASSERT(reg != PP); // Only pop PP with PopAndUntagPP().
ldr(reg, Address(SP, 1 * kWordSize, Address::PostIndex));
}
void PushFloat(VRegister reg) {
fstrs(reg, Address(SP, -1 * kFloatSize, Address::PreIndex));
}
void PushDouble(VRegister reg) {
fstrd(reg, Address(SP, -1 * kWordSize, Address::PreIndex));
fstrd(reg, Address(SP, -1 * kDoubleSize, Address::PreIndex));
}
void PushQuad(VRegister reg) {
fstrq(reg, Address(SP, -1 * kQuadSize, Address::PreIndex));
}
void PopFloat(VRegister reg) {
fldrs(reg, Address(SP, 1 * kFloatSize, Address::PostIndex));
}
void PopDouble(VRegister reg) {
fldrd(reg, Address(SP, 1 * kWordSize, Address::PostIndex));
fldrd(reg, Address(SP, 1 * kDoubleSize, Address::PostIndex));
}
void PopQuad(VRegister reg) {
fldrq(reg, Address(SP, 1 * kQuadSize, Address::PostIndex));
}
void TagAndPushPP() {
// Add the heap object tag back to PP before putting it on the stack.
@ -1464,6 +1537,14 @@ class Assembler : public ValueObject {
Emit(encoding);
}
void EmitSIMDTwoRegOp(SIMDTwoRegOp op, VRegister vd, VRegister vn) {
const int32_t encoding =
op |
(static_cast<int32_t>(vd) << kVdShift) |
(static_cast<int32_t>(vn) << kVnShift);
Emit(encoding);
}
void StoreIntoObjectFilter(Register object, Register value, Label* no_update);
// Shorter filtering sequence that assumes that value is not a smi.

View file

@ -1846,6 +1846,191 @@ ASSEMBLER_TEST_RUN(FldrdFstrdScaledReg, test) {
}
ASSEMBLER_TEST_GENERATE(VinswVmovrs, assembler) {
__ LoadImmediate(R0, 42, kNoPP);
__ LoadImmediate(R1, 43, kNoPP);
__ LoadImmediate(R2, 44, kNoPP);
__ LoadImmediate(R3, 45, kNoPP);
__ vinsw(V0, 0, R0);
__ vinsw(V0, 1, R1);
__ vinsw(V0, 2, R2);
__ vinsw(V0, 3, R3);
__ vmovrs(R4, V0, 0);
__ vmovrs(R5, V0, 1);
__ vmovrs(R6, V0, 2);
__ vmovrs(R7, V0, 3);
__ add(R0, R4, Operand(R5));
__ add(R0, R0, Operand(R6));
__ add(R0, R0, Operand(R7));
__ ret();
}
ASSEMBLER_TEST_RUN(VinswVmovrs, test) {
EXPECT(test != NULL);
typedef int (*Tst)();
EXPECT_EQ(174, EXECUTE_TEST_CODE_INT64(Tst, test->entry()));
}
ASSEMBLER_TEST_GENERATE(VinsxVmovrd, assembler) {
__ LoadImmediate(R0, 42, kNoPP);
__ LoadImmediate(R1, 43, kNoPP);
__ vinsx(V0, 0, R0);
__ vinsx(V0, 1, R1);
__ vmovrd(R2, V0, 0);
__ vmovrd(R3, V0, 1);
__ add(R0, R2, Operand(R3));
__ ret();
}
ASSEMBLER_TEST_RUN(VinsxVmovrd, test) {
EXPECT(test != NULL);
typedef int (*Tst)();
EXPECT_EQ(85, EXECUTE_TEST_CODE_INT64(Tst, test->entry()));
}
ASSEMBLER_TEST_GENERATE(Vnot, assembler) {
__ LoadImmediate(R0, 0xfffffffe, kNoPP);
__ LoadImmediate(R1, 0xffffffff, kNoPP);
__ vinsw(V1, 0, R1);
__ vinsw(V1, 1, R0);
__ vinsw(V1, 2, R1);
__ vinsw(V1, 3, R0);
__ vnot(V0, V1);
__ vmovrs(R2, V0, 0);
__ vmovrs(R3, V0, 1);
__ vmovrs(R4, V0, 2);
__ vmovrs(R5, V0, 3);
__ add(R0, R2, Operand(R3));
__ add(R0, R0, Operand(R4));
__ add(R0, R0, Operand(R5));
__ ret();
}
ASSEMBLER_TEST_RUN(Vnot, test) {
EXPECT(test != NULL);
typedef int (*Tst)();
EXPECT_EQ(2, EXECUTE_TEST_CODE_INT64(Tst, test->entry()));
}
ASSEMBLER_TEST_GENERATE(Vabss, assembler) {
__ LoadDImmediate(V1, 21.0, kNoPP);
__ LoadDImmediate(V2, -21.0, kNoPP);
__ fcvtsd(V1, V1);
__ fcvtsd(V2, V2);
__ veor(V3, V3, V3);
__ vinss(V3, 1, V1, 0);
__ vinss(V3, 3, V2, 0);
__ vabss(V4, V3);
__ vinss(V5, 0, V4, 1);
__ vinss(V6, 0, V4, 3);
__ fcvtds(V5, V5);
__ fcvtds(V6, V6);
__ faddd(V0, V5, V6);
__ ret();
}
ASSEMBLER_TEST_RUN(Vabss, test) {
typedef int (*SimpleCode)();
EXPECT_EQ(42.0, EXECUTE_TEST_CODE_DOUBLE(SimpleCode, test->entry()));
}
ASSEMBLER_TEST_GENERATE(Vabsd, assembler) {
__ LoadDImmediate(V1, 21.0, kNoPP);
__ LoadDImmediate(V2, -21.0, kNoPP);
__ vinsd(V3, 0, V1, 0);
__ vinsd(V3, 1, V2, 0);
__ vabsd(V4, V3);
__ vinsd(V5, 0, V4, 0);
__ vinsd(V6, 0, V4, 1);
__ faddd(V0, V5, V6);
__ ret();
}
ASSEMBLER_TEST_RUN(Vabsd, test) {
typedef int (*SimpleCode)();
EXPECT_EQ(42.0, EXECUTE_TEST_CODE_DOUBLE(SimpleCode, test->entry()));
}
ASSEMBLER_TEST_GENERATE(Vnegs, assembler) {
__ LoadDImmediate(V1, 42.0, kNoPP);
__ LoadDImmediate(V2, -84.0, kNoPP);
__ fcvtsd(V1, V1);
__ fcvtsd(V2, V2);
__ veor(V3, V3, V3);
__ vinss(V3, 1, V1, 0);
__ vinss(V3, 3, V2, 0);
__ vnegs(V4, V3);
__ vinss(V5, 0, V4, 1);
__ vinss(V6, 0, V4, 3);
__ fcvtds(V5, V5);
__ fcvtds(V6, V6);
__ faddd(V0, V5, V6);
__ ret();
}
ASSEMBLER_TEST_RUN(Vnegs, test) {
typedef int (*SimpleCode)();
EXPECT_EQ(42.0, EXECUTE_TEST_CODE_DOUBLE(SimpleCode, test->entry()));
}
ASSEMBLER_TEST_GENERATE(Vnegd, assembler) {
__ LoadDImmediate(V1, 42.0, kNoPP);
__ LoadDImmediate(V2, -84.0, kNoPP);
__ vinsd(V3, 0, V1, 0);
__ vinsd(V3, 1, V2, 0);
__ vnegd(V4, V3);
__ vinsd(V5, 0, V4, 0);
__ vinsd(V6, 0, V4, 1);
__ faddd(V0, V5, V6);
__ ret();
}
ASSEMBLER_TEST_RUN(Vnegd, test) {
typedef int (*SimpleCode)();
EXPECT_EQ(42.0, EXECUTE_TEST_CODE_DOUBLE(SimpleCode, test->entry()));
}
ASSEMBLER_TEST_GENERATE(Vadds, assembler) {
__ LoadDImmediate(V0, 0.0, kNoPP);
__ LoadDImmediate(V1, 1.0, kNoPP);
@ -1857,21 +2042,17 @@ ASSEMBLER_TEST_GENERATE(Vadds, assembler) {
__ fcvtsd(V2, V2);
__ fcvtsd(V3, V3);
const int sword_bytes = 1 << Log2OperandSizeBytes(kSWord);
const int qword_bytes = 1 << Log2OperandSizeBytes(kQWord);
__ fstrs(V0, Address(SP, -1 * sword_bytes, Address::PreIndex));
__ fstrs(V1, Address(SP, -1 * sword_bytes, Address::PreIndex));
__ fstrs(V2, Address(SP, -1 * sword_bytes, Address::PreIndex));
__ fstrs(V3, Address(SP, -1 * sword_bytes, Address::PreIndex));
__ vinss(V4, 0, V0, 0);
__ vinss(V4, 1, V1, 0);
__ vinss(V4, 2, V2, 0);
__ vinss(V4, 3, V3, 0);
__ fldrq(V4, Address(SP, 1 * qword_bytes, Address::PostIndex));
__ vadds(V5, V4, V4);
__ fstrq(V5, Address(SP, -1 * qword_bytes, Address::PreIndex));
__ fldrs(V0, Address(SP, 1 * sword_bytes, Address::PostIndex));
__ fldrs(V1, Address(SP, 1 * sword_bytes, Address::PostIndex));
__ fldrs(V2, Address(SP, 1 * sword_bytes, Address::PostIndex));
__ fldrs(V3, Address(SP, 1 * sword_bytes, Address::PostIndex));
__ vinss(V0, 0, V5, 0);
__ vinss(V1, 0, V5, 1);
__ vinss(V2, 0, V5, 2);
__ vinss(V3, 0, V5, 3);
__ fcvtds(V0, V0);
__ fcvtds(V1, V1);
@ -1903,21 +2084,17 @@ ASSEMBLER_TEST_GENERATE(Vsubs, assembler) {
__ fcvtsd(V2, V2);
__ fcvtsd(V3, V3);
const int sword_bytes = 1 << Log2OperandSizeBytes(kSWord);
const int qword_bytes = 1 << Log2OperandSizeBytes(kQWord);
__ fstrs(V0, Address(SP, -1 * sword_bytes, Address::PreIndex));
__ fstrs(V1, Address(SP, -1 * sword_bytes, Address::PreIndex));
__ fstrs(V2, Address(SP, -1 * sword_bytes, Address::PreIndex));
__ fstrs(V3, Address(SP, -1 * sword_bytes, Address::PreIndex));
__ vinss(V4, 0, V0, 0);
__ vinss(V4, 1, V1, 0);
__ vinss(V4, 2, V2, 0);
__ vinss(V4, 3, V3, 0);
__ fldrq(V4, Address(SP, 1 * qword_bytes, Address::PostIndex));
__ vsubs(V5, V5, V4);
__ fstrq(V5, Address(SP, -1 * qword_bytes, Address::PreIndex));
__ fldrs(V0, Address(SP, 1 * sword_bytes, Address::PostIndex));
__ fldrs(V1, Address(SP, 1 * sword_bytes, Address::PostIndex));
__ fldrs(V2, Address(SP, 1 * sword_bytes, Address::PostIndex));
__ fldrs(V3, Address(SP, 1 * sword_bytes, Address::PostIndex));
__ vinss(V0, 0, V5, 0);
__ vinss(V1, 0, V5, 1);
__ vinss(V2, 0, V5, 2);
__ vinss(V3, 0, V5, 3);
__ fcvtds(V0, V0);
__ fcvtds(V1, V1);
@ -1948,21 +2125,17 @@ ASSEMBLER_TEST_GENERATE(Vmuls, assembler) {
__ fcvtsd(V2, V2);
__ fcvtsd(V3, V3);
const int sword_bytes = 1 << Log2OperandSizeBytes(kSWord);
const int qword_bytes = 1 << Log2OperandSizeBytes(kQWord);
__ fstrs(V0, Address(SP, -1 * sword_bytes, Address::PreIndex));
__ fstrs(V1, Address(SP, -1 * sword_bytes, Address::PreIndex));
__ fstrs(V2, Address(SP, -1 * sword_bytes, Address::PreIndex));
__ fstrs(V3, Address(SP, -1 * sword_bytes, Address::PreIndex));
__ vinss(V4, 0, V0, 0);
__ vinss(V4, 1, V1, 0);
__ vinss(V4, 2, V2, 0);
__ vinss(V4, 3, V3, 0);
__ fldrq(V4, Address(SP, 1 * qword_bytes, Address::PostIndex));
__ vmuls(V5, V4, V4);
__ fstrq(V5, Address(SP, -1 * qword_bytes, Address::PreIndex));
__ fldrs(V0, Address(SP, 1 * sword_bytes, Address::PostIndex));
__ fldrs(V1, Address(SP, 1 * sword_bytes, Address::PostIndex));
__ fldrs(V2, Address(SP, 1 * sword_bytes, Address::PostIndex));
__ fldrs(V3, Address(SP, 1 * sword_bytes, Address::PostIndex));
__ vinss(V0, 0, V5, 0);
__ vinss(V1, 0, V5, 1);
__ vinss(V2, 0, V5, 2);
__ vinss(V3, 0, V5, 3);
__ fcvtds(V0, V0);
__ fcvtds(V1, V1);
@ -1993,21 +2166,17 @@ ASSEMBLER_TEST_GENERATE(Vdivs, assembler) {
__ fcvtsd(V2, V2);
__ fcvtsd(V3, V3);
const int sword_bytes = 1 << Log2OperandSizeBytes(kSWord);
const int qword_bytes = 1 << Log2OperandSizeBytes(kQWord);
__ fstrs(V0, Address(SP, -1 * sword_bytes, Address::PreIndex));
__ fstrs(V1, Address(SP, -1 * sword_bytes, Address::PreIndex));
__ fstrs(V2, Address(SP, -1 * sword_bytes, Address::PreIndex));
__ fstrs(V3, Address(SP, -1 * sword_bytes, Address::PreIndex));
__ vinss(V4, 0, V0, 0);
__ vinss(V4, 1, V1, 0);
__ vinss(V4, 2, V2, 0);
__ vinss(V4, 3, V3, 0);
__ fldrq(V4, Address(SP, 1 * qword_bytes, Address::PostIndex));
__ vdivs(V5, V4, V4);
__ fstrq(V5, Address(SP, -1 * qword_bytes, Address::PreIndex));
__ fldrs(V3, Address(SP, 1 * sword_bytes, Address::PostIndex));
__ fldrs(V2, Address(SP, 1 * sword_bytes, Address::PostIndex));
__ fldrs(V1, Address(SP, 1 * sword_bytes, Address::PostIndex));
__ fldrs(V0, Address(SP, 1 * sword_bytes, Address::PostIndex));
__ vinss(V0, 0, V5, 0);
__ vinss(V1, 0, V5, 1);
__ vinss(V2, 0, V5, 2);
__ vinss(V3, 0, V5, 3);
__ fcvtds(V0, V0);
__ fcvtds(V1, V1);
@ -2027,22 +2196,17 @@ ASSEMBLER_TEST_RUN(Vdivs, test) {
}
ASSEMBLER_TEST_GENERATE(Vaddd, assembler) {
__ LoadDImmediate(V0, 2.0, kNoPP);
__ LoadDImmediate(V1, 3.0, kNoPP);
const int dword_bytes = 1 << Log2OperandSizeBytes(kDWord);
const int qword_bytes = 1 << Log2OperandSizeBytes(kQWord);
__ fstrd(V0, Address(SP, -1 * dword_bytes, Address::PreIndex));
__ fstrd(V1, Address(SP, -1 * dword_bytes, Address::PreIndex));
__ vinsd(V4, 0, V0, 0);
__ vinsd(V4, 1, V1, 0);
__ fldrq(V4, Address(SP, 1 * qword_bytes, Address::PostIndex));
__ vaddd(V5, V4, V4);
__ fstrq(V5, Address(SP, -1 * qword_bytes, Address::PreIndex));
__ fldrd(V1, Address(SP, 1 * dword_bytes, Address::PostIndex));
__ fldrd(V0, Address(SP, 1 * dword_bytes, Address::PostIndex));
__ vinsd(V0, 0, V5, 0);
__ vinsd(V1, 0, V5, 1);
__ faddd(V0, V0, V1);
__ ret();
@ -2060,17 +2224,13 @@ ASSEMBLER_TEST_GENERATE(Vsubd, assembler) {
__ LoadDImmediate(V1, 3.0, kNoPP);
__ LoadDImmediate(V5, 0.0, kNoPP);
const int dword_bytes = 1 << Log2OperandSizeBytes(kDWord);
const int qword_bytes = 1 << Log2OperandSizeBytes(kQWord);
__ fstrd(V0, Address(SP, -1 * dword_bytes, Address::PreIndex));
__ fstrd(V1, Address(SP, -1 * dword_bytes, Address::PreIndex));
__ vinsd(V4, 0, V0, 0);
__ vinsd(V4, 1, V1, 0);
__ fldrq(V4, Address(SP, 1 * qword_bytes, Address::PostIndex));
__ vsubd(V5, V5, V4);
__ fstrq(V5, Address(SP, -1 * qword_bytes, Address::PreIndex));
__ fldrd(V1, Address(SP, 1 * dword_bytes, Address::PostIndex));
__ fldrd(V0, Address(SP, 1 * dword_bytes, Address::PostIndex));
__ vinsd(V0, 0, V5, 0);
__ vinsd(V1, 0, V5, 1);
__ faddd(V0, V0, V1);
__ ret();
@ -2087,17 +2247,13 @@ ASSEMBLER_TEST_GENERATE(Vmuld, assembler) {
__ LoadDImmediate(V0, 2.0, kNoPP);
__ LoadDImmediate(V1, 3.0, kNoPP);
const int dword_bytes = 1 << Log2OperandSizeBytes(kDWord);
const int qword_bytes = 1 << Log2OperandSizeBytes(kQWord);
__ fstrd(V0, Address(SP, -1 * dword_bytes, Address::PreIndex));
__ fstrd(V1, Address(SP, -1 * dword_bytes, Address::PreIndex));
__ vinsd(V4, 0, V0, 0);
__ vinsd(V4, 1, V1, 0);
__ fldrq(V4, Address(SP, 1 * qword_bytes, Address::PostIndex));
__ vmuld(V5, V4, V4);
__ fstrq(V5, Address(SP, -1 * qword_bytes, Address::PreIndex));
__ fldrd(V1, Address(SP, 1 * dword_bytes, Address::PostIndex));
__ fldrd(V0, Address(SP, 1 * dword_bytes, Address::PostIndex));
__ vinsd(V0, 0, V5, 0);
__ vinsd(V1, 0, V5, 1);
__ faddd(V0, V0, V1);
__ ret();
@ -2114,17 +2270,13 @@ ASSEMBLER_TEST_GENERATE(Vdivd, assembler) {
__ LoadDImmediate(V0, 2.0, kNoPP);
__ LoadDImmediate(V1, 3.0, kNoPP);
const int dword_bytes = 1 << Log2OperandSizeBytes(kDWord);
const int qword_bytes = 1 << Log2OperandSizeBytes(kQWord);
__ fstrd(V0, Address(SP, -1 * dword_bytes, Address::PreIndex));
__ fstrd(V1, Address(SP, -1 * dword_bytes, Address::PreIndex));
__ vinsd(V4, 0, V0, 0);
__ vinsd(V4, 1, V1, 0);
__ fldrq(V4, Address(SP, 1 * qword_bytes, Address::PostIndex));
__ vdivd(V5, V4, V4);
__ fstrq(V5, Address(SP, -1 * qword_bytes, Address::PreIndex));
__ fldrd(V1, Address(SP, 1 * dword_bytes, Address::PostIndex));
__ fldrd(V0, Address(SP, 1 * dword_bytes, Address::PostIndex));
__ vinsd(V0, 0, V5, 0);
__ vinsd(V1, 0, V5, 1);
__ faddd(V0, V0, V1);
__ ret();
@ -2247,6 +2399,209 @@ ASSEMBLER_TEST_RUN(Vinss, test) {
}
ASSEMBLER_TEST_GENERATE(Vand, assembler) {
__ LoadDImmediate(V1, 21.0, kNoPP);
__ LoadImmediate(R0, 0xffffffff, kNoPP);
// V0 <- (0, 0xffffffff, 0, 0xffffffff)
__ fmovdr(V0, R0);
__ vinss(V0, 2, V0, 0);
// V1 <- (21.0, 21.0, 21.0, 21.0)
__ fcvtsd(V1, V1);
__ vdups(V1, V1, 0);
__ vand(V2, V1, V0);
__ vinss(V3, 0, V2, 0);
__ vinss(V4, 0, V2, 1);
__ vinss(V5, 0, V2, 2);
__ vinss(V6, 0, V2, 3);
__ fcvtds(V3, V3);
__ fcvtds(V4, V4);
__ fcvtds(V5, V5);
__ fcvtds(V6, V6);
__ vaddd(V0, V3, V4);
__ vaddd(V0, V0, V5);
__ vaddd(V0, V0, V6);
__ ret();
}
ASSEMBLER_TEST_RUN(Vand, test) {
typedef int (*SimpleCode)();
EXPECT_EQ(42.0, EXECUTE_TEST_CODE_DOUBLE(SimpleCode, test->entry()));
}
ASSEMBLER_TEST_GENERATE(Vorr, assembler) {
__ LoadDImmediate(V1, 10.5, kNoPP);
__ fcvtsd(V1, V1);
// V0 <- (0, 10.5, 0, 10.5)
__ fmovdd(V0, V1);
__ vinss(V0, 2, V0, 0);
// V1 <- (10.5, 0, 10.5, 0)
__ veor(V1, V1, V1);
__ vinss(V1, 1, V0, 0);
__ vinss(V1, 3, V0, 0);
__ vorr(V2, V1, V0);
__ vinss(V3, 0, V2, 0);
__ vinss(V4, 0, V2, 1);
__ vinss(V5, 0, V2, 2);
__ vinss(V6, 0, V2, 3);
__ fcvtds(V3, V3);
__ fcvtds(V4, V4);
__ fcvtds(V5, V5);
__ fcvtds(V6, V6);
__ vaddd(V0, V3, V4);
__ vaddd(V0, V0, V5);
__ vaddd(V0, V0, V6);
__ ret();
}
ASSEMBLER_TEST_RUN(Vorr, test) {
typedef int (*SimpleCode)();
EXPECT_EQ(42.0, EXECUTE_TEST_CODE_DOUBLE(SimpleCode, test->entry()));
}
ASSEMBLER_TEST_GENERATE(Veor, assembler) {
__ LoadImmediate(R1, 0xffffffff, kNoPP);
__ LoadImmediate(R2, ~21, kNoPP);
__ vinsw(V1, 0, R1);
__ vinsw(V1, 1, R2);
__ vinsw(V1, 2, R1);
__ vinsw(V1, 3, R2);
__ vinsw(V2, 0, R1);
__ vinsw(V2, 1, R1);
__ vinsw(V2, 2, R1);
__ vinsw(V2, 3, R1);
__ veor(V0, V1, V2);
__ vmovrs(R3, V0, 0);
__ vmovrs(R4, V0, 1);
__ vmovrs(R5, V0, 2);
__ vmovrs(R6, V0, 3);
__ add(R0, R3, Operand(R4));
__ add(R0, R0, Operand(R5));
__ add(R0, R0, Operand(R6));
__ ret();
}
ASSEMBLER_TEST_RUN(Veor, test) {
typedef int (*SimpleCode)();
EXPECT_EQ(42, EXECUTE_TEST_CODE_INT64(SimpleCode, test->entry()));
}
ASSEMBLER_TEST_GENERATE(Vaddw, assembler) {
__ LoadImmediate(R4, 21, kNoPP);
__ vdupw(V1, R4);
__ vdupw(V2, R4);
__ vaddw(V0, V1, V2);
__ vmovrs(R0, V0, 0);
__ vmovrs(R1, V0, 1);
__ vmovrs(R2, V0, 2);
__ vmovrs(R3, V0, 3);
__ add(R0, R0, Operand(R1));
__ add(R0, R0, Operand(R2));
__ add(R0, R0, Operand(R3));
__ ret();
}
ASSEMBLER_TEST_RUN(Vaddw, test) {
typedef int (*SimpleCode)();
EXPECT_EQ(168, EXECUTE_TEST_CODE_INT64(SimpleCode, test->entry()));
}
ASSEMBLER_TEST_GENERATE(Vsubw, assembler) {
__ LoadImmediate(R4, 31, kNoPP);
__ LoadImmediate(R5, 10, kNoPP);
__ vdupw(V1, R4);
__ vdupw(V2, R5);
__ vsubw(V0, V1, V2);
__ vmovrs(R0, V0, 0);
__ vmovrs(R1, V0, 1);
__ vmovrs(R2, V0, 2);
__ vmovrs(R3, V0, 3);
__ add(R0, R0, Operand(R1));
__ add(R0, R0, Operand(R2));
__ add(R0, R0, Operand(R3));
__ ret();
}
ASSEMBLER_TEST_RUN(Vsubw, test) {
typedef int (*SimpleCode)();
EXPECT_EQ(84, EXECUTE_TEST_CODE_INT64(SimpleCode, test->entry()));
}
ASSEMBLER_TEST_GENERATE(Vaddx, assembler) {
__ LoadImmediate(R4, 21, kNoPP);
__ vdupx(V1, R4);
__ vdupx(V2, R4);
__ vaddx(V0, V1, V2);
__ vmovrd(R0, V0, 0);
__ vmovrd(R1, V0, 1);
__ add(R0, R0, Operand(R1));
__ ret();
}
ASSEMBLER_TEST_RUN(Vaddx, test) {
typedef int (*SimpleCode)();
EXPECT_EQ(84, EXECUTE_TEST_CODE_INT64(SimpleCode, test->entry()));
}
ASSEMBLER_TEST_GENERATE(Vsubx, assembler) {
__ LoadImmediate(R4, 31, kNoPP);
__ LoadImmediate(R5, 10, kNoPP);
__ vdupx(V1, R4);
__ vdupx(V2, R5);
__ vsubx(V0, V1, V2);
__ vmovrd(R0, V0, 0);
__ vmovrd(R1, V0, 1);
__ add(R0, R0, Operand(R1));
__ ret();
}
ASSEMBLER_TEST_RUN(Vsubx, test) {
typedef int (*SimpleCode)();
EXPECT_EQ(42, EXECUTE_TEST_CODE_INT64(SimpleCode, test->entry()));
}
// Called from assembler_test.cc.
// LR: return address.
// R0: context.

View file

@ -457,6 +457,10 @@ enum LogicalShiftOp {
enum SIMDCopyOp {
SIMDCopyMask = 0x9fe08400,
SIMDCopyFixed = DPSimd1Fixed | B10,
VDUPI = SIMDCopyFixed | B30 | B11,
VINSI = SIMDCopyFixed | B30 | B12 | B11,
VMOVW = SIMDCopyFixed | B13 | B12 | B11,
VMOVX = SIMDCopyFixed | B30 | B13 | B12 | B11,
VDUP = SIMDCopyFixed | B30,
VINS = SIMDCopyFixed | B30 | B29,
};
@ -465,6 +469,13 @@ enum SIMDCopyOp {
enum SIMDThreeSameOp {
SIMDThreeSameMask = 0x9f200400,
SIMDThreeSameFixed = DPSimd1Fixed | B21 | B10,
VAND = SIMDThreeSameFixed | B30 | B12 | B11,
VORR = SIMDThreeSameFixed | B30 | B23 | B12 | B11,
VEOR = SIMDThreeSameFixed | B30 | B29 | B12 | B11,
VADDW = SIMDThreeSameFixed | B30 | B23 | B15,
VADDX = SIMDThreeSameFixed | B30 | B23 | B22 | B15,
VSUBW = SIMDThreeSameFixed | B30 | B29 | B23 | B15,
VSUBX = SIMDThreeSameFixed | B30 | B29 | B23 | B22 | B15,
VADDS = SIMDThreeSameFixed | B30 | B15 | B14 | B12,
VADDD = SIMDThreeSameFixed | B30 | B22 | B15 | B14 | B12,
VSUBS = SIMDThreeSameFixed | B30 | B23 | B15 | B14 | B12,
@ -475,6 +486,17 @@ enum SIMDThreeSameOp {
VDIVD = SIMDThreeSameFixed | B30 | B29 | B22 | B15 | B14 | B13 | B12 | B11,
};
// C.3.6.17
enum SIMDTwoRegOp {
SIMDTwoRegMask = 0x9f3e0c00,
SIMDTwoRegFixed = DPSimd1Fixed | B21 | B11,
VNOT = SIMDTwoRegFixed | B30 | B29 | B14 | B12,
VABSS = SIMDTwoRegFixed | B30 | B23 | B15 | B14 | B13 | B12,
VNEGS = SIMDTwoRegFixed | B30 | B29 | B23 | B15 | B14 | B13 | B12,
VABSD = SIMDTwoRegFixed | B30 | B23 | B22 | B15 | B14 | B13 | B12,
VNEGD = SIMDTwoRegFixed | B30 | B29 | B23 | B22 | B15 | B14 | B13 | B12,
};
// C.3.6.22
enum FPCompareOp {
FPCompareMask = 0xffa0fc07,
@ -552,6 +574,7 @@ _V(MiscDP3Source) \
_V(LogicalShift) \
_V(SIMDCopy) \
_V(SIMDThreeSame) \
_V(SIMDTwoReg) \
_V(FPCompare) \
_V(FPOneSource) \
_V(FPTwoSource) \

View file

@ -541,10 +541,18 @@ int ARM64Decoder::FormatOption(Instr* instr, const char* format) {
if (format[1] == 's') {
ASSERT(STRING_STARTS_WITH(format, "vsz"));
char const* sz_str;
if (instr->Bit(22) == 0) {
sz_str = "f32";
if (instr->Bits(14, 2) == 3) {
switch (instr->Bit(22)) {
case 0: sz_str = "s"; break;
case 1: sz_str = "d"; break;
default: UNREACHABLE(); break;
}
} else {
sz_str = "f64";
switch (instr->Bit(22)) {
case 0: sz_str = "w"; break;
case 1: sz_str = "x"; break;
default: UNREACHABLE(); break;
}
}
buffer_pos_ += OS::SNPrint(current_position_in_buffer(),
remaining_size_in_buffer(),
@ -1028,8 +1036,18 @@ void ARM64Decoder::DecodeSIMDCopy(Instr* instr) {
const int32_t op = instr->Bit(29);
const int32_t imm4 = instr->Bits(11, 4);
if ((Q == 1) && (op == 0) && (imm4 == 0)) {
if ((op == 0) && (imm4 == 7)) {
if (Q == 0) {
Format(instr, "vmovrs 'rd, 'vn'idx5");
} else {
Format(instr, "vmovrd 'rd, 'vn'idx5");
}
} else if ((Q == 1) && (op == 0) && (imm4 == 0)) {
Format(instr, "vdup'csz 'vd, 'vn'idx5");
} else if ((Q == 1) && (op == 0) && (imm4 == 3)) {
Format(instr, "vins'csz 'vd'idx5, 'rn");
} else if ((Q == 1) && (op == 0) && (imm4 == 1)) {
Format(instr, "vdup'csz 'vd, 'rn");
} else if ((Q == 1) && (op == 1)) {
Format(instr, "vins'csz 'vd'idx5, 'vn'idx4");
} else {
@ -1048,7 +1066,19 @@ void ARM64Decoder::DecodeSIMDThreeSame(Instr* instr) {
return;
}
if ((U == 0) && (opcode == 0x1a)) {
if ((U == 0) && (opcode == 0x3)) {
if (instr->Bit(23) == 0) {
Format(instr, "vand 'vd, 'vn, 'vm");
} else {
Format(instr, "vorr 'vd, 'vn, 'vm");
}
} else if ((U == 1) && (opcode == 0x3)) {
Format(instr, "veor 'vd, 'vn, 'vm");
} else if ((U == 0) && (opcode == 0x10)) {
Format(instr, "vadd'vsz 'vd, 'vn, 'vm");
} else if ((U == 1) && (opcode == 0x10)) {
Format(instr, "vsub'vsz 'vd, 'vn, 'vm");
} else if ((U == 0) && (opcode == 0x1a)) {
if (instr->Bit(23) == 0) {
Format(instr, "vadd'vsz 'vd, 'vn, 'vm");
} else {
@ -1064,11 +1094,48 @@ void ARM64Decoder::DecodeSIMDThreeSame(Instr* instr) {
}
void ARM64Decoder::DecodeSIMDTwoReg(Instr* instr) {
const int32_t Q = instr->Bit(30);
const int32_t U = instr->Bit(29);
const int32_t op = instr->Bits(12, 5);
const int32_t sz = instr->Bits(22, 2);
if (Q == 0) {
Unknown(instr);
return;
}
if ((U == 1) && (op == 0x5)) {
Format(instr, "vnot 'vd, 'vn");
} else if ((U == 0) && (op == 0xf)) {
if (sz == 2) {
Format(instr, "vabss 'vd, 'vn");
} else if (sz == 3) {
Format(instr, "vabsd 'vd, 'vn");
} else {
Unknown(instr);
}
} else if ((U == 1) && (op == 0xf)) {
if (sz == 2) {
Format(instr, "vnegs 'vd, 'vn");
} else if (sz == 3) {
Format(instr, "vnegd 'vd, 'vn");
} else {
Unknown(instr);
}
} else {
Unknown(instr);
}
}
void ARM64Decoder::DecodeDPSimd1(Instr* instr) {
if (instr->IsSIMDCopyOp()) {
DecodeSIMDCopy(instr);
} else if (instr->IsSIMDThreeSameOp()) {
DecodeSIMDThreeSame(instr);
} else if (instr->IsSIMDTwoRegOp()) {
DecodeSIMDTwoReg(instr);
} else {
Unknown(instr);
}

View file

@ -1440,8 +1440,7 @@ void FlowGraphCompiler::SaveLiveRegisters(LocationSummary* locs) {
reg_idx >= 0; --reg_idx) {
VRegister fpu_reg = static_cast<VRegister>(reg_idx);
if (locs->live_registers()->ContainsFpuRegister(fpu_reg)) {
// TODO(zra): Save the whole V register.
__ PushDouble(fpu_reg);
__ PushQuad(fpu_reg);
}
}
}
@ -1473,8 +1472,7 @@ void FlowGraphCompiler::RestoreLiveRegisters(LocationSummary* locs) {
for (intptr_t reg_idx = 0; reg_idx < kNumberOfVRegisters; ++reg_idx) {
VRegister fpu_reg = static_cast<VRegister>(reg_idx);
if (locs->live_registers()->ContainsFpuRegister(fpu_reg)) {
// TODO(zra): Restore the whole V register.
__ PopDouble(fpu_reg);
__ PopQuad(fpu_reg);
}
}
}
@ -1559,7 +1557,7 @@ void ParallelMoveResolver::EmitMove(int index) {
}
} else if (source.IsFpuRegister()) {
if (destination.IsFpuRegister()) {
__ fmovdd(destination.fpu_reg(), source.fpu_reg());
__ vmov(destination.fpu_reg(), source.fpu_reg());
} else {
if (destination.IsDoubleStackSlot()) {
const intptr_t dest_offset = destination.ToStackSlotOffset();
@ -1567,7 +1565,8 @@ void ParallelMoveResolver::EmitMove(int index) {
__ StoreDToOffset(src, FP, dest_offset, PP);
} else {
ASSERT(destination.IsQuadStackSlot());
UNIMPLEMENTED();
const intptr_t dest_offset = destination.ToStackSlotOffset();
__ StoreQToOffset(source.fpu_reg(), FP, dest_offset, PP);
}
}
} else if (source.IsDoubleStackSlot()) {
@ -1583,7 +1582,16 @@ void ParallelMoveResolver::EmitMove(int index) {
__ StoreDToOffset(VTMP, FP, dest_offset, PP);
}
} else if (source.IsQuadStackSlot()) {
UNIMPLEMENTED();
if (destination.IsFpuRegister()) {
const intptr_t dest_offset = source.ToStackSlotOffset();
__ LoadQFromOffset(destination.fpu_reg(), FP, dest_offset, PP);
} else {
ASSERT(destination.IsQuadStackSlot());
const intptr_t source_offset = source.ToStackSlotOffset();
const intptr_t dest_offset = destination.ToStackSlotOffset();
__ LoadQFromOffset(VTMP, FP, source_offset, PP);
__ StoreQToOffset(VTMP, FP, dest_offset, PP);
}
} else {
ASSERT(source.IsConstant());
const Object& constant = source.constant();

View file

@ -3900,6 +3900,10 @@ void Simd32x4ShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
const DRegister dvalue0 = EvenDRegisterOf(value);
const DRegister dvalue1 = OddDRegisterOf(value);
const SRegister svalue0 = EvenSRegisterOf(dvalue0);
const SRegister svalue1 = OddSRegisterOf(dvalue0);
const SRegister svalue2 = EvenSRegisterOf(dvalue1);
const SRegister svalue3 = OddSRegisterOf(dvalue1);
const DRegister dtemp0 = DTMP;
const DRegister dtemp1 = OddDRegisterOf(QTMP);
@ -3909,20 +3913,16 @@ void Simd32x4ShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
switch (op_kind()) {
case MethodRecognizer::kFloat32x4ShuffleX:
__ vdup(kWord, result, dvalue0, 0);
__ vcvtds(dresult0, sresult0);
__ vcvtds(dresult0, svalue0);
break;
case MethodRecognizer::kFloat32x4ShuffleY:
__ vdup(kWord, result, dvalue0, 1);
__ vcvtds(dresult0, sresult0);
__ vcvtds(dresult0, svalue1);
break;
case MethodRecognizer::kFloat32x4ShuffleZ:
__ vdup(kWord, result, dvalue1, 0);
__ vcvtds(dresult0, sresult0);
__ vcvtds(dresult0, svalue2);
break;
case MethodRecognizer::kFloat32x4ShuffleW:
__ vdup(kWord, result, dvalue1, 1);
__ vcvtds(dresult0, sresult0);
__ vcvtds(dresult0, svalue3);
break;
case MethodRecognizer::kInt32x4Shuffle:
case MethodRecognizer::kFloat32x4Shuffle:
@ -4919,24 +4919,11 @@ void BinaryInt32x4OpInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
const QRegister right = locs()->in(1).fpu_reg();
const QRegister result = locs()->out(0).fpu_reg();
switch (op_kind()) {
case Token::kBIT_AND: {
__ vandq(result, left, right);
break;
}
case Token::kBIT_OR: {
__ vorrq(result, left, right);
break;
}
case Token::kBIT_XOR: {
__ veorq(result, left, right);
break;
}
case Token::kADD:
__ vaddqi(kWord, result, left, right);
break;
case Token::kSUB:
__ vsubqi(kWord, result, left, right);
break;
case Token::kBIT_AND: __ vandq(result, left, right); break;
case Token::kBIT_OR: __ vorrq(result, left, right); break;
case Token::kBIT_XOR: __ veorq(result, left, right); break;
case Token::kADD: __ vaddqi(kWord, result, left, right); break;
case Token::kSUB: __ vsubqi(kWord, result, left, right); break;
default: UNREACHABLE();
}
}

View file

@ -3426,37 +3426,125 @@ void BinaryFloat64x2OpInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
LocationSummary* Simd32x4ShuffleInstr::MakeLocationSummary(Isolate* isolate,
bool opt) const {
UNIMPLEMENTED();
return NULL;
const intptr_t kNumInputs = 1;
const intptr_t kNumTemps = 0;
LocationSummary* summary = new LocationSummary(
isolate, kNumInputs, kNumTemps, LocationSummary::kNoCall);
summary->set_in(0, Location::RequiresFpuRegister());
summary->set_out(0, Location::RequiresFpuRegister());
return summary;
}
void Simd32x4ShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
UNIMPLEMENTED();
const VRegister value = locs()->in(0).fpu_reg();
const VRegister result = locs()->out(0).fpu_reg();
switch (op_kind()) {
case MethodRecognizer::kFloat32x4ShuffleX:
__ vinss(result, 0, value, 0);
__ fcvtds(result, result);
break;
case MethodRecognizer::kFloat32x4ShuffleY:
__ vinss(result, 0, value, 1);
__ fcvtds(result, result);
break;
case MethodRecognizer::kFloat32x4ShuffleZ:
__ vinss(result, 0, value, 2);
__ fcvtds(result, result);
break;
case MethodRecognizer::kFloat32x4ShuffleW:
__ vinss(result, 0, value, 3);
__ fcvtds(result, result);
break;
case MethodRecognizer::kInt32x4Shuffle:
case MethodRecognizer::kFloat32x4Shuffle:
if (mask_ == 0x00) {
__ vdups(result, value, 0);
} else if (mask_ == 0x55) {
__ vdups(result, value, 1);
} else if (mask_ == 0xAA) {
__ vdups(result, value, 2);
} else if (mask_ == 0xFF) {
__ vdups(result, value, 3);
} else {
__ vinss(result, 0, value, mask_ & 0x3);
__ vinss(result, 1, value, (mask_ >> 2) & 0x3);
__ vinss(result, 2, value, (mask_ >> 4) & 0x3);
__ vinss(result, 3, value, (mask_ >> 6) & 0x3);
}
break;
default: UNREACHABLE();
}
}
LocationSummary* Simd32x4ShuffleMixInstr::MakeLocationSummary(Isolate* isolate,
bool opt) const {
UNIMPLEMENTED();
return NULL;
const intptr_t kNumInputs = 2;
const intptr_t kNumTemps = 0;
LocationSummary* summary = new LocationSummary(
isolate, kNumInputs, kNumTemps, LocationSummary::kNoCall);
summary->set_in(0, Location::RequiresFpuRegister());
summary->set_in(1, Location::RequiresFpuRegister());
summary->set_out(0, Location::RequiresFpuRegister());
return summary;
}
void Simd32x4ShuffleMixInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
UNIMPLEMENTED();
const VRegister left = locs()->in(0).fpu_reg();
const VRegister right = locs()->in(1).fpu_reg();
const VRegister result = locs()->out(0).fpu_reg();
switch (op_kind()) {
case MethodRecognizer::kFloat32x4ShuffleMix:
case MethodRecognizer::kInt32x4ShuffleMix:
__ vinss(result, 0, left, mask_ & 0x3);
__ vinss(result, 1, left, (mask_ >> 2) & 0x3);
__ vinss(result, 2, right, (mask_ >> 4) & 0x3);
__ vinss(result, 3, right, (mask_ >> 6) & 0x3);
break;
default: UNREACHABLE();
}
}
LocationSummary* Simd32x4GetSignMaskInstr::MakeLocationSummary(Isolate* isolate,
bool opt) const {
UNIMPLEMENTED();
return NULL;
const intptr_t kNumInputs = 1;
const intptr_t kNumTemps = 1;
LocationSummary* summary = new LocationSummary(
isolate, kNumInputs, kNumTemps, LocationSummary::kNoCall);
summary->set_in(0, Location::RequiresFpuRegister());
summary->set_temp(0, Location::RequiresRegister());
summary->set_out(0, Location::RequiresRegister());
return summary;
}
void Simd32x4GetSignMaskInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
UNIMPLEMENTED();
const VRegister value = locs()->in(0).fpu_reg();
const Register out = locs()->out(0).reg();
const Register temp = locs()->temp(0).reg();
// X lane.
__ vmovrs(out, value, 0);
__ Lsr(out, out, 31);
// Y lane.
__ vmovrs(temp, value, 1);
__ Lsr(temp, temp, 31);
__ orr(out, out, Operand(temp, LSL, 1));
// Z lane.
__ vmovrs(temp, value, 2);
__ Lsr(temp, temp, 31);
__ orr(out, out, Operand(temp, LSL, 2));
// W lane.
__ vmovrs(temp, value, 3);
__ Lsr(temp, temp, 31);
__ orr(out, out, Operand(temp, LSL, 3));
// Tag.
__ SmiTag(out);
}
@ -3482,14 +3570,14 @@ void Float32x4ConstructorInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
const VRegister v3 = locs()->in(3).fpu_reg();
const VRegister r = locs()->out(0).fpu_reg();
__ fcvtsd(v0, v0);
__ vinss(r, 0, v0, 0);
__ fcvtsd(v1, v1);
__ vinss(r, 1, v1, 1);
__ fcvtsd(v2, v2);
__ vinss(r, 2, v2, 2);
__ fcvtsd(v3, v3);
__ vinss(r, 3, v3, 3);
__ fcvtsd(VTMP, v0);
__ vinss(r, 0, VTMP, 0);
__ fcvtsd(VTMP, v1);
__ vinss(r, 1, VTMP, 0);
__ fcvtsd(VTMP, v2);
__ vinss(r, 2, VTMP, 0);
__ fcvtsd(VTMP, v3);
__ vinss(r, 3, VTMP, 0);
}
@ -3506,7 +3594,7 @@ LocationSummary* Float32x4ZeroInstr::MakeLocationSummary(Isolate* isolate,
void Float32x4ZeroInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
const VRegister v = locs()->out(0).fpu_reg();
__ LoadDImmediate(v, 0.0, PP);
__ veor(v, v, v);
}
@ -3601,13 +3689,29 @@ void Float32x4ScaleInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
LocationSummary* Float32x4ZeroArgInstr::MakeLocationSummary(Isolate* isolate,
bool opt) const {
UNIMPLEMENTED();
return NULL;
const intptr_t kNumInputs = 1;
const intptr_t kNumTemps = 0;
LocationSummary* summary = new LocationSummary(
isolate, kNumInputs, kNumTemps, LocationSummary::kNoCall);
summary->set_in(0, Location::RequiresFpuRegister());
summary->set_out(0, Location::RequiresFpuRegister());
return summary;
}
void Float32x4ZeroArgInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
UNIMPLEMENTED();
const VRegister left = locs()->in(0).fpu_reg();
const VRegister result = locs()->out(0).fpu_reg();
switch (op_kind()) {
case MethodRecognizer::kFloat32x4Negate:
__ vnegs(result, left);
break;
case MethodRecognizer::kFloat32x4Absolute:
__ vabss(result, left);
break;
default: UNREACHABLE();
}
}
@ -3625,25 +3729,64 @@ void Float32x4ClampInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
LocationSummary* Float32x4WithInstr::MakeLocationSummary(Isolate* isolate,
bool opt) const {
UNIMPLEMENTED();
return NULL;
const intptr_t kNumInputs = 2;
const intptr_t kNumTemps = 0;
LocationSummary* summary = new LocationSummary(
isolate, kNumInputs, kNumTemps, LocationSummary::kNoCall);
summary->set_in(0, Location::RequiresFpuRegister());
summary->set_in(1, Location::RequiresFpuRegister());
summary->set_out(0, Location::RequiresFpuRegister());
return summary;
}
void Float32x4WithInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
UNIMPLEMENTED();
const VRegister replacement = locs()->in(0).fpu_reg();
const VRegister value = locs()->in(1).fpu_reg();
const VRegister result = locs()->out(0).fpu_reg();
__ fcvtsd(VTMP, replacement);
if (result != value) {
__ vmov(result, value);
}
switch (op_kind()) {
case MethodRecognizer::kFloat32x4WithX:
__ vinss(result, 0, VTMP, 0);
break;
case MethodRecognizer::kFloat32x4WithY:
__ vinss(result, 1, VTMP, 0);
break;
case MethodRecognizer::kFloat32x4WithZ:
__ vinss(result, 2, VTMP, 0);
break;
case MethodRecognizer::kFloat32x4WithW:
__ vinss(result, 3, VTMP, 0);
break;
default: UNREACHABLE();
}
}
LocationSummary* Float32x4ToInt32x4Instr::MakeLocationSummary(Isolate* isolate,
bool opt) const {
UNIMPLEMENTED();
return NULL;
const intptr_t kNumInputs = 1;
const intptr_t kNumTemps = 0;
LocationSummary* summary = new LocationSummary(
isolate, kNumInputs, kNumTemps, LocationSummary::kNoCall);
summary->set_in(0, Location::RequiresFpuRegister());
summary->set_out(0, Location::RequiresFpuRegister());
return summary;
}
void Float32x4ToInt32x4Instr::EmitNativeCode(FlowGraphCompiler* compiler) {
UNIMPLEMENTED();
const VRegister value = locs()->in(0).fpu_reg();
const VRegister result = locs()->out(0).fpu_reg();
if (value != result) {
__ vmov(result, value);
}
}
@ -3688,7 +3831,7 @@ LocationSummary* Float64x2ZeroInstr::MakeLocationSummary(Isolate* isolate,
void Float64x2ZeroInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
const VRegister v = locs()->out(0).fpu_reg();
__ LoadDImmediate(v, 0.0, PP);
__ veor(v, v, v);
}
@ -3735,25 +3878,57 @@ void Float64x2ConstructorInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
LocationSummary* Float64x2ToFloat32x4Instr::MakeLocationSummary(
Isolate* isolate, bool opt) const {
UNIMPLEMENTED();
return NULL;
const intptr_t kNumInputs = 1;
const intptr_t kNumTemps = 0;
LocationSummary* summary = new LocationSummary(
isolate, kNumInputs, kNumTemps, LocationSummary::kNoCall);
summary->set_in(0, Location::RequiresFpuRegister());
summary->set_out(0, Location::RequiresFpuRegister());
return summary;
}
void Float64x2ToFloat32x4Instr::EmitNativeCode(FlowGraphCompiler* compiler) {
UNIMPLEMENTED();
const VRegister q = locs()->in(0).fpu_reg();
const VRegister r = locs()->out(0).fpu_reg();
// Zero register.
__ veor(r, r, r);
// Set X lane.
__ vinsd(VTMP, 0, q, 0);
__ fcvtsd(VTMP, VTMP);
__ vinss(r, 0, VTMP, 0);
// Set Y lane.
__ vinsd(VTMP, 0, q, 1);
__ fcvtsd(VTMP, VTMP);
__ vinss(r, 1, VTMP, 0);
}
LocationSummary* Float32x4ToFloat64x2Instr::MakeLocationSummary(
Isolate* isolate, bool opt) const {
UNIMPLEMENTED();
return NULL;
const intptr_t kNumInputs = 1;
const intptr_t kNumTemps = 0;
LocationSummary* summary = new LocationSummary(
isolate, kNumInputs, kNumTemps, LocationSummary::kNoCall);
summary->set_in(0, Location::RequiresFpuRegister());
summary->set_out(0, Location::RequiresFpuRegister());
return summary;
}
void Float32x4ToFloat64x2Instr::EmitNativeCode(FlowGraphCompiler* compiler) {
UNIMPLEMENTED();
const VRegister q = locs()->in(0).fpu_reg();
const VRegister r = locs()->out(0).fpu_reg();
// Set X.
__ vinss(VTMP, 0, q, 0);
__ fcvtds(VTMP, VTMP);
__ vinsd(r, 0, VTMP, 0);
// Set Y.
__ vinss(VTMP, 0, q, 1);
__ fcvtds(VTMP, VTMP);
__ vinsd(r, 1, VTMP, 0);
}
@ -3771,85 +3946,260 @@ void Float64x2ZeroArgInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
LocationSummary* Float64x2OneArgInstr::MakeLocationSummary(Isolate* isolate,
bool opt) const {
UNIMPLEMENTED();
return NULL;
const intptr_t kNumInputs = 2;
const intptr_t kNumTemps = 0;
LocationSummary* summary = new LocationSummary(
isolate, kNumInputs, kNumTemps, LocationSummary::kNoCall);
summary->set_in(0, Location::RequiresFpuRegister());
summary->set_in(1, Location::RequiresFpuRegister());
summary->set_out(0, Location::SameAsFirstInput());
return summary;
}
void Float64x2OneArgInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
UNIMPLEMENTED();
const VRegister left = locs()->in(0).fpu_reg();
const VRegister right = locs()->in(1).fpu_reg();
const VRegister out = locs()->out(0).fpu_reg();
ASSERT(left == out);
switch (op_kind()) {
case MethodRecognizer::kFloat64x2Scale:
__ vmuld(out, left, right);
break;
case MethodRecognizer::kFloat64x2WithX:
__ vinsd(out, 0, right, 0);
break;
case MethodRecognizer::kFloat64x2WithY:
__ vinsd(out, 1, right, 0);
break;
case MethodRecognizer::kFloat64x2Min: {
UNIMPLEMENTED();
break;
}
case MethodRecognizer::kFloat64x2Max: {
UNIMPLEMENTED();
break;
}
default: UNREACHABLE();
}
}
LocationSummary* Int32x4BoolConstructorInstr::MakeLocationSummary(
Isolate* isolate, bool opt) const {
UNIMPLEMENTED();
return NULL;
const intptr_t kNumInputs = 4;
const intptr_t kNumTemps = 1;
LocationSummary* summary = new LocationSummary(
isolate, kNumInputs, kNumTemps, LocationSummary::kNoCall);
summary->set_in(0, Location::RequiresRegister());
summary->set_in(1, Location::RequiresRegister());
summary->set_in(2, Location::RequiresRegister());
summary->set_in(3, Location::RequiresRegister());
summary->set_temp(0, Location::RequiresRegister());
summary->set_out(0, Location::RequiresFpuRegister());
return summary;
}
void Int32x4BoolConstructorInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
UNIMPLEMENTED();
const Register v0 = locs()->in(0).reg();
const Register v1 = locs()->in(1).reg();
const Register v2 = locs()->in(2).reg();
const Register v3 = locs()->in(3).reg();
const Register temp = locs()->temp(0).reg();
const VRegister result = locs()->out(0).fpu_reg();
__ veor(result, result, result);
__ LoadImmediate(temp, 0xffffffff, PP);
__ LoadObject(TMP2, Bool::True(), PP);
// __ CompareObject(v0, Bool::True(), PP);
__ CompareRegisters(v0, TMP2);
__ csel(TMP, temp, ZR, EQ);
__ vinsw(result, 0, TMP);
// __ CompareObject(v1, Bool::True(), PP);
__ CompareRegisters(v1, TMP2);
__ csel(TMP, temp, ZR, EQ);
__ vinsw(result, 1, TMP);
// __ CompareObject(v2, Bool::True(), PP);
__ CompareRegisters(v2, TMP2);
__ csel(TMP, temp, ZR, EQ);
__ vinsw(result, 2, TMP);
// __ CompareObject(v3, Bool::True(), PP);
__ CompareRegisters(v3, TMP2);
__ csel(TMP, temp, ZR, EQ);
__ vinsw(result, 3, TMP);
}
LocationSummary* Int32x4GetFlagInstr::MakeLocationSummary(Isolate* isolate,
bool opt) const {
UNIMPLEMENTED();
return NULL;
const intptr_t kNumInputs = 1;
const intptr_t kNumTemps = 0;
LocationSummary* summary = new LocationSummary(
isolate, kNumInputs, kNumTemps, LocationSummary::kNoCall);
summary->set_in(0, Location::RequiresFpuRegister());
summary->set_out(0, Location::RequiresRegister());
return summary;
}
void Int32x4GetFlagInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
UNIMPLEMENTED();
const VRegister value = locs()->in(0).fpu_reg();
const Register result = locs()->out(0).reg();
switch (op_kind()) {
case MethodRecognizer::kInt32x4GetFlagX:
__ vmovrs(result, value, 0);
break;
case MethodRecognizer::kInt32x4GetFlagY:
__ vmovrs(result, value, 1);
break;
case MethodRecognizer::kInt32x4GetFlagZ:
__ vmovrs(result, value, 2);
break;
case MethodRecognizer::kInt32x4GetFlagW:
__ vmovrs(result, value, 3);
break;
default: UNREACHABLE();
}
__ tst(result, Operand(result));
__ LoadObject(result, Bool::True(), PP);
__ LoadObject(TMP, Bool::False(), PP);
__ csel(result, TMP, result, EQ);
}
LocationSummary* Int32x4SelectInstr::MakeLocationSummary(Isolate* isolate,
bool opt) const {
UNIMPLEMENTED();
return NULL;
const intptr_t kNumInputs = 3;
const intptr_t kNumTemps = 1;
LocationSummary* summary = new LocationSummary(
isolate, kNumInputs, kNumTemps, LocationSummary::kNoCall);
summary->set_in(0, Location::RequiresFpuRegister());
summary->set_in(1, Location::RequiresFpuRegister());
summary->set_in(2, Location::RequiresFpuRegister());
summary->set_temp(0, Location::RequiresFpuRegister());
summary->set_out(0, Location::RequiresFpuRegister());
return summary;
}
void Int32x4SelectInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
UNIMPLEMENTED();
const VRegister mask = locs()->in(0).fpu_reg();
const VRegister trueValue = locs()->in(1).fpu_reg();
const VRegister falseValue = locs()->in(2).fpu_reg();
const VRegister out = locs()->out(0).fpu_reg();
const VRegister temp = locs()->temp(0).fpu_reg();
// Copy mask.
__ vmov(temp, mask);
// Invert it.
__ vnot(temp, temp);
// mask = mask & trueValue.
__ vand(mask, mask, trueValue);
// temp = temp & falseValue.
__ vand(temp, temp, falseValue);
// out = mask | temp.
__ vorr(out, mask, temp);
}
LocationSummary* Int32x4SetFlagInstr::MakeLocationSummary(Isolate* isolate,
bool opt) const {
UNIMPLEMENTED();
return NULL;
const intptr_t kNumInputs = 2;
const intptr_t kNumTemps = 0;
LocationSummary* summary = new LocationSummary(
isolate, kNumInputs, kNumTemps, LocationSummary::kNoCall);
summary->set_in(0, Location::RequiresFpuRegister());
summary->set_in(1, Location::RequiresRegister());
summary->set_out(0, Location::RequiresFpuRegister());
return summary;
}
void Int32x4SetFlagInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
UNIMPLEMENTED();
const VRegister mask = locs()->in(0).fpu_reg();
const Register flag = locs()->in(1).reg();
const VRegister result = locs()->out(0).fpu_reg();
if (result != mask) {
__ vmov(result, mask);
}
__ CompareObject(flag, Bool::True(), PP);
__ LoadImmediate(TMP, 0xffffffff, PP);
__ csel(TMP, TMP, ZR, EQ);
switch (op_kind()) {
case MethodRecognizer::kInt32x4WithFlagX:
__ vinsw(result, 0, TMP);
break;
case MethodRecognizer::kInt32x4WithFlagY:
__ vinsw(result, 1, TMP);
break;
case MethodRecognizer::kInt32x4WithFlagZ:
__ vinsw(result, 2, TMP);
break;
case MethodRecognizer::kInt32x4WithFlagW:
__ vinsw(result, 3, TMP);
break;
default: UNREACHABLE();
}
}
LocationSummary* Int32x4ToFloat32x4Instr::MakeLocationSummary(Isolate* isolate,
bool opt) const {
UNIMPLEMENTED();
return NULL;
const intptr_t kNumInputs = 1;
const intptr_t kNumTemps = 0;
LocationSummary* summary = new LocationSummary(
isolate, kNumInputs, kNumTemps, LocationSummary::kNoCall);
summary->set_in(0, Location::RequiresFpuRegister());
summary->set_out(0, Location::RequiresFpuRegister());
return summary;
}
void Int32x4ToFloat32x4Instr::EmitNativeCode(FlowGraphCompiler* compiler) {
UNIMPLEMENTED();
const VRegister value = locs()->in(0).fpu_reg();
const VRegister result = locs()->out(0).fpu_reg();
if (value != result) {
__ vmov(result, value);
}
}
LocationSummary* BinaryInt32x4OpInstr::MakeLocationSummary(Isolate* isolate,
bool opt) const {
UNIMPLEMENTED();
return NULL;
const intptr_t kNumInputs = 2;
const intptr_t kNumTemps = 0;
LocationSummary* summary = new LocationSummary(
isolate, kNumInputs, kNumTemps, LocationSummary::kNoCall);
summary->set_in(0, Location::RequiresFpuRegister());
summary->set_in(1, Location::RequiresFpuRegister());
summary->set_out(0, Location::RequiresFpuRegister());
return summary;
}
void BinaryInt32x4OpInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
UNIMPLEMENTED();
const VRegister left = locs()->in(0).fpu_reg();
const VRegister right = locs()->in(1).fpu_reg();
const VRegister result = locs()->out(0).fpu_reg();
switch (op_kind()) {
case Token::kBIT_AND: __ vand(result, left, right); break;
case Token::kBIT_OR: __ vorr(result, left, right); break;
case Token::kBIT_XOR: __ veor(result, left, right); break;
case Token::kADD: __ vaddw(result, left, right); break;
case Token::kSUB: __ vsubw(result, left, right); break;
default: UNREACHABLE();
}
}

View file

@ -2205,7 +2205,17 @@ void Simulator::DecodeSIMDCopy(Instr* instr) {
const VRegister vd = instr->VdField();
const VRegister vn = instr->VnField();
if ((Q == 1) && (op == 0) && (imm4 == 0)) {
const Register rn = instr->RnField();
const Register rd = instr->RdField();
if ((op == 0) && (imm4 == 7)) {
if (Q == 0) {
// Format(instr, "vmovrs 'rd, 'vn'idx5");
set_wregister(rd, get_vregisters(vn, idx5), R31IsZR);
} else {
// Format(instr, "vmovrd 'rd, 'vn'idx5");
set_register(rd, get_vregisterd(vn, idx5), R31IsZR);
}
} else if ((Q == 1) && (op == 0) && (imm4 == 0)) {
// Format(instr, "vdup'csz 'vd, 'vn'idx5");
if (element_bytes == 4) {
for (int i = 0; i < 4; i++) {
@ -2219,6 +2229,29 @@ void Simulator::DecodeSIMDCopy(Instr* instr) {
UnimplementedInstruction(instr);
return;
}
} else if ((Q == 1) && (op == 0) && (imm4 == 3)) {
// Format(instr, "vins'csz 'vd'idx5, 'rn");
if (element_bytes == 4) {
set_vregisters(vd, idx5, get_wregister(rn, R31IsZR));
} else if (element_bytes == 8) {
set_vregisterd(vd, idx5, get_register(rn, R31IsZR));
} else {
UnimplementedInstruction(instr);
}
} else if ((Q == 1) && (op == 0) && (imm4 == 1)) {
// Format(instr, "vdup'csz 'vd, 'rn");
if (element_bytes == 4) {
for (int i = 0; i < 4; i++) {
set_vregisters(vd, i, get_wregister(rn, R31IsZR));
}
} else if (element_bytes == 8) {
for (int i = 0; i < 2; i++) {
set_vregisterd(vd, i, get_register(rn, R31IsZR));
}
} else {
UnimplementedInstruction(instr);
return;
}
} else if ((Q == 1) && (op == 1)) {
// Format(instr, "vins'csz 'vd'idx5, 'vn'idx4");
if (element_bytes == 4) {
@ -2250,64 +2283,159 @@ void Simulator::DecodeSIMDThreeSame(Instr* instr) {
if (instr->Bit(22) == 0) {
// f32 case.
for (int idx = 0; idx < 4; idx++) {
const float vn_val = bit_cast<float, int32_t>(get_vregisters(vn, idx));
const float vm_val = bit_cast<float, int32_t>(get_vregisters(vm, idx));
float res = 0.0;
if ((U == 0) && (opcode == 0x1a)) {
const int32_t vn_val = get_vregisters(vn, idx);
const int32_t vm_val = get_vregisters(vm, idx);
const float vn_flt = bit_cast<float, int32_t>(vn_val);
const float vm_flt = bit_cast<float, int32_t>(vm_val);
int32_t res = 0.0;
if ((U == 0) && (opcode == 0x3)) {
if (instr->Bit(23) == 0) {
// Format(instr, "vand 'vd, 'vn, 'vm");
res = vn_val & vm_val;
} else {
// Format(instr, "vorr 'vd, 'vn, 'vm");
res = vn_val | vm_val;
}
} else if ((U == 1) && (opcode == 0x3)) {
// Format(instr, "veor 'vd, 'vn, 'vm");
res = vn_val ^ vm_val;
} else if ((U == 0) && (opcode == 0x10)) {
// Format(instr, "vadd'vsz 'vd, 'vn, 'vm");
res = vn_val + vm_val;
} else if ((U == 1) && (opcode == 0x10)) {
// Format(instr, "vsub'vsz 'vd, 'vn, 'vm");
res = vn_val - vm_val;
} else if ((U == 0) && (opcode == 0x1a)) {
if (instr->Bit(23) == 0) {
// Format(instr, "vadd'vsz 'vd, 'vn, 'vm");
res = vn_val + vm_val;
res = bit_cast<int32_t, float>(vn_flt + vm_flt);
} else {
// Format(instr, "vsub'vsz 'vd, 'vn, 'vm");
res = vn_val - vm_val;
res = bit_cast<int32_t, float>(vn_flt - vm_flt);
}
} else if ((U == 1) && (opcode == 0x1b)) {
// Format(instr, "vmul'vsz 'vd, 'vn, 'vm");
res = vn_val * vm_val;
res = bit_cast<int32_t, float>(vn_flt * vm_flt);
} else if ((U == 1) && (opcode == 0x1f)) {
// Format(instr, "vdiv'vsz 'vd, 'vn, 'vm");
res = vn_val / vm_val;
res = bit_cast<int32_t, float>(vn_flt / vm_flt);
} else {
UnimplementedInstruction(instr);
return;
}
set_vregisters(vd, idx, bit_cast<int32_t, float>(res));
set_vregisters(vd, idx, res);
}
} else {
// f64 case.
for (int idx = 0; idx < 2; idx++) {
const double vn_val = bit_cast<double, int64_t>(get_vregisterd(vn, idx));
const double vm_val = bit_cast<double, int64_t>(get_vregisterd(vm, idx));
double res = 0.0;
if ((U == 0) && (opcode == 0x1a)) {
const int64_t vn_val = get_vregisterd(vn, idx);
const int64_t vm_val = get_vregisterd(vm, idx);
const double vn_dbl = bit_cast<double, int64_t>(vn_val);
const double vm_dbl = bit_cast<double, int64_t>(vm_val);
int64_t res = 0.0;
if ((U == 0) && (opcode == 0x3)) {
if (instr->Bit(23) == 0) {
// Format(instr, "vand 'vd, 'vn, 'vm");
res = vn_val & vm_val;
} else {
// Format(instr, "vorr 'vd, 'vn, 'vm");
res = vn_val | vm_val;
}
} else if ((U == 1) && (opcode == 0x3)) {
// Format(instr, "veor 'vd, 'vn, 'vm");
res = vn_val ^ vm_val;
} else if ((U == 0) && (opcode == 0x10)) {
// Format(instr, "vadd'vsz 'vd, 'vn, 'vm");
res = vn_val + vm_val;
} else if ((U == 1) && (opcode == 0x10)) {
// Format(instr, "vsub'vsz 'vd, 'vn, 'vm");
res = vn_val - vm_val;
} else if ((U == 0) && (opcode == 0x1a)) {
if (instr->Bit(23) == 0) {
// Format(instr, "vadd'vsz 'vd, 'vn, 'vm");
res = vn_val + vm_val;
res = bit_cast<int64_t, double>(vn_dbl + vm_dbl);
} else {
// Format(instr, "vsub'vsz 'vd, 'vn, 'vm");
res = vn_val - vm_val;
res = bit_cast<int64_t, double>(vn_dbl - vm_dbl);
}
} else if ((U == 1) && (opcode == 0x1b)) {
// Format(instr, "vmul'vsz 'vd, 'vn, 'vm");
res = vn_val * vm_val;
res = bit_cast<int64_t, double>(vn_dbl * vm_dbl);
} else if ((U == 1) && (opcode == 0x1f)) {
// Format(instr, "vdiv'vsz 'vd, 'vn, 'vm");
res = vn_val / vm_val;
res = bit_cast<int64_t, double>(vn_dbl / vm_dbl);
} else {
UnimplementedInstruction(instr);
return;
}
set_vregisterd(vd, idx, bit_cast<int64_t, double>(res));
set_vregisterd(vd, idx, res);
}
}
}
void Simulator::DecodeSIMDTwoReg(Instr* instr) {
const int32_t Q = instr->Bit(30);
const int32_t U = instr->Bit(29);
const int32_t op = instr->Bits(12, 5);
const int32_t sz = instr->Bits(22, 2);
const VRegister vd = instr->VdField();
const VRegister vn = instr->VnField();
if ((Q == 1) && (U == 1) && (op == 5)) {
// Format(instr, "vnot 'vd, 'vn");
for (int i = 0; i < 2; i++) {
set_vregisterd(vd, i, ~get_vregisterd(vn, i));
}
} else if ((U == 0) && (op == 0xf)) {
if (sz == 2) {
// Format(instr, "vabss 'vd, 'vn");
for (int i = 0; i < 4; i++) {
const int32_t vn_val = get_vregisters(vn, i);
const float vn_flt = bit_cast<float, int32_t>(vn_val);
set_vregisters(vd, i, bit_cast<int32_t, float>(fabsf(vn_flt)));
}
} else if (sz == 3) {
// Format(instr, "vabsd 'vd, 'vn");
for (int i = 0; i < 2; i++) {
const int64_t vn_val = get_vregisterd(vn, i);
const double vn_dbl = bit_cast<double, int64_t>(vn_val);
set_vregisterd(vd, i, bit_cast<int64_t, double>(fabs(vn_dbl)));
}
} else {
UnimplementedInstruction(instr);
}
} else if ((U == 1) && (op == 0xf)) {
if (sz == 2) {
// Format(instr, "vnegs 'vd, 'vn");
for (int i = 0; i < 4; i++) {
const int32_t vn_val = get_vregisters(vn, i);
const float vn_flt = bit_cast<float, int32_t>(vn_val);
set_vregisters(vd, i, bit_cast<int32_t, float>(-vn_flt));
}
} else if (sz == 3) {
// Format(instr, "vnegd 'vd, 'vn");
for (int i = 0; i < 2; i++) {
const int64_t vn_val = get_vregisterd(vn, i);
const double vn_dbl = bit_cast<double, int64_t>(vn_val);
set_vregisterd(vd, i, bit_cast<int64_t, double>(-vn_dbl));
}
} else {
UnimplementedInstruction(instr);
}
} else {
UnimplementedInstruction(instr);
}
}
void Simulator::DecodeDPSimd1(Instr* instr) {
if (instr->IsSIMDCopyOp()) {
DecodeSIMDCopy(instr);
} else if (instr->IsSIMDThreeSameOp()) {
DecodeSIMDThreeSame(instr);
} else if (instr->IsSIMDTwoRegOp()) {
DecodeSIMDTwoReg(instr);
} else {
UnimplementedInstruction(instr);
}