SIMD plumbing

Review URL: https://codereview.chromium.org//12871015

git-svn-id: https://dart.googlecode.com/svn/branches/bleeding_edge/dart@20287 260f80e4-7a28-3924-810f-c04153c831b5
This commit is contained in:
johnmccutchan@google.com 2013-03-20 20:29:00 +00:00
parent 6cd9cee512
commit 3d4778e2c2
17 changed files with 499 additions and 190 deletions

View file

@ -252,12 +252,12 @@ DEFINE_NATIVE_ENTRY(ByteArray_setUint64, 3) {
DEFINE_NATIVE_ENTRY(ByteArray_getFloat32x4, 2) {
UNALIGNED_GETTER(ByteArray, Float32x4, simd_value_t);
UNALIGNED_GETTER(ByteArray, Float32x4, simd128_value_t);
}
DEFINE_NATIVE_ENTRY(ByteArray_setFloat32x4, 3) {
UNALIGNED_SETTER(ByteArray, Float32x4, value, simd_value_t);
UNALIGNED_SETTER(ByteArray, Float32x4, value, simd128_value_t);
}
@ -621,7 +621,8 @@ DEFINE_NATIVE_ENTRY(Float32x4List_newTransferable, 1) {
GET_NON_NULL_NATIVE_ARGUMENT(Smi, length, arguments->NativeArgAt(0));
intptr_t len = length.Value();
LengthCheck(len, Float32x4Array::kMaxElements);
simd_value_t* bytes = OS::AllocateAlignedArray<simd_value_t>(len, kAlignment);
simd128_value_t* bytes =
OS::AllocateAlignedArray<simd128_value_t>(len, kAlignment);
const ExternalFloat32x4Array& obj =
ExternalFloat32x4Array::Handle(ExternalFloat32x4Array::New(bytes, len));
obj.AddFinalizer(bytes, PeerFinalizer);
@ -630,12 +631,12 @@ DEFINE_NATIVE_ENTRY(Float32x4List_newTransferable, 1) {
DEFINE_NATIVE_ENTRY(Float32x4Array_getIndexed, 2) {
GETTER(Float32x4Array, Float32x4, simd_value_t);
GETTER(Float32x4Array, Float32x4, simd128_value_t);
}
DEFINE_NATIVE_ENTRY(Float32x4Array_setIndexed, 3) {
SETTER(Float32x4Array, Float32x4, value, simd_value_t);
SETTER(Float32x4Array, Float32x4, value, simd128_value_t);
}
@ -816,12 +817,12 @@ DEFINE_NATIVE_ENTRY(ExternalUint64Array_setIndexed, 3) {
// ExternalFloat32x4Array
DEFINE_NATIVE_ENTRY(ExternalFloat32x4Array_getIndexed, 2) {
GETTER(ExternalFloat32x4Array, Float32x4, simd_value_t);
GETTER(ExternalFloat32x4Array, Float32x4, simd128_value_t);
}
DEFINE_NATIVE_ENTRY(ExternalFloat32x4Array_setIndexed, 3) {
SETTER(ExternalFloat32x4Array, Float32x4, value, simd_value_t);
SETTER(ExternalFloat32x4Array, Float32x4, value, simd128_value_t);
}

View file

@ -72,6 +72,45 @@
#error Automatic target os detection failed.
#endif
struct simd128_value_t {
float storage[4];
simd128_value_t& readFrom(const float* v) {
storage[0] = v[0];
storage[1] = v[1];
storage[2] = v[2];
storage[3] = v[3];
return *this;
}
simd128_value_t& readFrom(const uint32_t* v) {
const float* vv = reinterpret_cast<const float*>(v);
storage[0] = vv[0];
storage[1] = vv[1];
storage[2] = vv[2];
storage[3] = vv[3];
return *this;
}
simd128_value_t& readFrom(const simd128_value_t* v) {
*this = *v;
return *this;
}
void writeTo(float* v) {
v[0] = storage[0];
v[1] = storage[1];
v[2] = storage[2];
v[3] = storage[3];
}
void writeTo(uint32_t* v) {
float* vv = reinterpret_cast<float*>(v);
vv[0] = storage[0];
vv[1] = storage[1];
vv[2] = storage[2];
vv[3] = storage[3];
}
void writeTo(simd128_value_t* v) {
*v = *this;
}
};
// Processor architecture detection. For more info on what's defined, see:
// http://msdn.microsoft.com/en-us/library/b0084kay.aspx
// http://www.agner.org/optimize/calling_conventions.pdf
@ -79,47 +118,23 @@
#if defined(_M_X64) || defined(__x86_64__)
#define HOST_ARCH_X64 1
#define ARCH_IS_64_BIT 1
#include <xmmintrin.h> // NOLINT
#define kFpuRegisterSize 16
typedef __m128 fpu_register_t;
typedef __m128 simd_value_t;
// Unaligned load.
#define simd_value_safe_load(addr) \
_mm_loadu_ps(reinterpret_cast<const float*>(addr))
// Unaligned store.
#define simd_value_safe_store(addr, value) \
_mm_storeu_ps(reinterpret_cast<float*>(addr), value)
typedef simd128_value_t fpu_register_t;
#elif defined(_M_IX86) || defined(__i386__)
#define HOST_ARCH_IA32 1
#define ARCH_IS_32_BIT 1
#include <xmmintrin.h> // NOLINT
#define kFpuRegisterSize 16
typedef __m128 fpu_register_t;
typedef __m128 simd_value_t;
// Unaligned load.
#define simd_value_safe_load(addr) \
_mm_loadu_ps(reinterpret_cast<const float*>(addr))
// Unaligned store.
#define simd_value_safe_store(addr, value) \
_mm_storeu_ps(reinterpret_cast<float*>(addr), value)
typedef simd128_value_t fpu_register_t;
#elif defined(__ARMEL__)
#define HOST_ARCH_ARM 1
#define ARCH_IS_32_BIT 1
#define kFpuRegisterSize 8
typedef double fpu_register_t;
// TODO(johnmccutchan): ARM simd type.
typedef struct {
uint32_t data_[4];
} simd_value_t;
#elif defined(__MIPSEL__)
#define HOST_ARCH_MIPS 1
#define ARCH_IS_32_BIT 1
#define kFpuRegisterSize 8
typedef double fpu_register_t;
// TODO(johnmccutchan): MIPS simd type.
typedef struct {
uint32_t data_[4];
} simd_value_t;
#else
#error Architecture was not detected as supported by Dart.
#endif
@ -215,7 +230,7 @@ typedef uintptr_t uword;
const int kWordSize = sizeof(word);
const int kDoubleSize = sizeof(double); // NOLINT
const int kFloatSize = sizeof(float); // NOLINT
const int kSimd128Size = 16;
const int kSimd128Size = sizeof(simd128_value_t); // NOLINT
#ifdef ARCH_IS_32_BIT
const int kWordSizeLog2 = 2;
const uword kUwordMax = kMaxUint32;

View file

@ -1450,11 +1450,13 @@ void DeoptimizeIfOwner(const GrowableArray<intptr_t>& classes) {
// Copy saved registers into the isolate buffer.
static void CopySavedRegisters(uword saved_registers_address) {
double* fpu_registers_copy = new double[kNumberOfFpuRegisters];
fpu_register_t* fpu_registers_copy =
new fpu_register_t[kNumberOfFpuRegisters];
ASSERT(fpu_registers_copy != NULL);
for (intptr_t i = 0; i < kNumberOfFpuRegisters; i++) {
fpu_registers_copy[i] = *reinterpret_cast<double*>(saved_registers_address);
saved_registers_address += kDoubleSize;
fpu_registers_copy[i] =
*reinterpret_cast<fpu_register_t*>(saved_registers_address);
saved_registers_address += kFpuRegisterSize;
}
Isolate::Current()->set_deopt_fpu_registers_copy(fpu_registers_copy);
@ -1506,7 +1508,8 @@ DEFINE_LEAF_RUNTIME_ENTRY(intptr_t, DeoptimizeCopyFrame,
// All registers have been saved below last-fp.
const uword last_fp = saved_registers_address +
kNumberOfCpuRegisters * kWordSize + kNumberOfFpuRegisters * kDoubleSize;
kNumberOfCpuRegisters * kWordSize +
kNumberOfFpuRegisters * kFpuRegisterSize;
CopySavedRegisters(saved_registers_address);
// Get optimized code and frame that need to be deoptimized.
@ -1608,7 +1611,7 @@ DEFINE_LEAF_RUNTIME_ENTRY(intptr_t, DeoptimizeFillFrame, uword last_fp) {
intptr_t* frame_copy = isolate->deopt_frame_copy();
intptr_t* cpu_registers_copy = isolate->deopt_cpu_registers_copy();
double* fpu_registers_copy = isolate->deopt_fpu_registers_copy();
fpu_register_t* fpu_registers_copy = isolate->deopt_fpu_registers_copy();
intptr_t deopt_reason = kDeoptUnknown;
const DeoptInfo& deopt_info = DeoptInfo::Handle(
@ -1634,42 +1637,17 @@ END_LEAF_RUNTIME_ENTRY
// This is the last step in the deoptimization, GC can occur.
DEFINE_RUNTIME_ENTRY(DeoptimizeMaterializeDoubles, 0) {
DeferredDouble* deferred_double = Isolate::Current()->DetachDeferredDoubles();
DeferredObject* deferred_object = Isolate::Current()->DetachDeferredObjects();
while (deferred_double != NULL) {
DeferredDouble* current = deferred_double;
deferred_double = deferred_double->next();
while (deferred_object != NULL) {
DeferredObject* current = deferred_object;
deferred_object = deferred_object->next();
RawDouble** slot = current->slot();
*slot = Double::New(current->value());
if (FLAG_trace_deoptimization_verbose) {
OS::PrintErr("materializing double at %"Px": %g\n",
reinterpret_cast<uword>(current->slot()),
current->value());
}
current->Materialize();
delete current;
}
DeferredMint* deferred_mint = Isolate::Current()->DetachDeferredMints();
while (deferred_mint != NULL) {
DeferredMint* current = deferred_mint;
deferred_mint = deferred_mint->next();
RawMint** slot = current->slot();
ASSERT(!Smi::IsValid64(current->value()));
*slot = Mint::New(current->value());
if (FLAG_trace_deoptimization_verbose) {
OS::PrintErr("materializing mint at %"Px": %"Pd64"\n",
reinterpret_cast<uword>(current->slot()),
current->value());
}
delete current;
}
// Since this is the only step where GC can occur during deoptimization,
// use it to report the source line where deoptimization occured.
if (FLAG_trace_deoptimization) {

View file

@ -170,6 +170,78 @@ class DeoptInt64StackSlotInstr : public DeoptInstr {
};
class DeoptFloat32x4StackSlotInstr : public DeoptInstr {
public:
explicit DeoptFloat32x4StackSlotInstr(intptr_t from_index)
: stack_slot_index_(from_index) {
ASSERT(stack_slot_index_ >= 0);
}
virtual intptr_t from_index() const { return stack_slot_index_; }
virtual DeoptInstr::Kind kind() const { return kFloat32x4StackSlot; }
virtual const char* ToCString() const {
const char* format = "f32x4s%"Pd"";
intptr_t len = OS::SNPrint(NULL, 0, format, stack_slot_index_);
char* chars = Isolate::Current()->current_zone()->Alloc<char>(len + 1);
OS::SNPrint(chars, len + 1, format, stack_slot_index_);
return chars;
}
void Execute(DeoptimizationContext* deopt_context, intptr_t to_index) {
intptr_t from_index =
deopt_context->from_frame_size() - stack_slot_index_ - 1;
simd128_value_t* from_addr = reinterpret_cast<simd128_value_t*>(
deopt_context->GetFromFrameAddressAt(from_index));
intptr_t* to_addr = deopt_context->GetToFrameAddressAt(to_index);
*reinterpret_cast<RawSmi**>(to_addr) = Smi::New(0);
Isolate::Current()->DeferFloat32x4Materialization(
*from_addr, reinterpret_cast<RawFloat32x4**>(to_addr));
}
private:
const intptr_t stack_slot_index_; // First argument is 0, always >= 0.
DISALLOW_COPY_AND_ASSIGN(DeoptFloat32x4StackSlotInstr);
};
class DeoptUint32x4StackSlotInstr : public DeoptInstr {
public:
explicit DeoptUint32x4StackSlotInstr(intptr_t from_index)
: stack_slot_index_(from_index) {
ASSERT(stack_slot_index_ >= 0);
}
virtual intptr_t from_index() const { return stack_slot_index_; }
virtual DeoptInstr::Kind kind() const { return kUint32x4StackSlot; }
virtual const char* ToCString() const {
const char* format = "ui32x4s%"Pd"";
intptr_t len = OS::SNPrint(NULL, 0, format, stack_slot_index_);
char* chars = Isolate::Current()->current_zone()->Alloc<char>(len + 1);
OS::SNPrint(chars, len + 1, format, stack_slot_index_);
return chars;
}
void Execute(DeoptimizationContext* deopt_context, intptr_t to_index) {
intptr_t from_index =
deopt_context->from_frame_size() - stack_slot_index_ - 1;
simd128_value_t* from_addr = reinterpret_cast<simd128_value_t*>(
deopt_context->GetFromFrameAddressAt(from_index));
intptr_t* to_addr = deopt_context->GetToFrameAddressAt(to_index);
*reinterpret_cast<RawSmi**>(to_addr) = Smi::New(0);
Isolate::Current()->DeferUint32x4Materialization(
*from_addr, reinterpret_cast<RawUint32x4**>(to_addr));
}
private:
const intptr_t stack_slot_index_; // First argument is 0, always >= 0.
DISALLOW_COPY_AND_ASSIGN(DeoptUint32x4StackSlotInstr);
};
// Deoptimization instruction creating return address using function and
// deopt-id stored at 'object_table_index'. Uses the deopt-after
// continuation point.
@ -413,6 +485,72 @@ class DeoptInt64FpuRegisterInstr: public DeoptInstr {
};
// Deoptimization instruction moving an XMM register.
class DeoptFloat32x4FpuRegisterInstr: public DeoptInstr {
public:
explicit DeoptFloat32x4FpuRegisterInstr(intptr_t reg_as_int)
: reg_(static_cast<FpuRegister>(reg_as_int)) {}
virtual intptr_t from_index() const { return static_cast<intptr_t>(reg_); }
virtual DeoptInstr::Kind kind() const { return kFloat32x4FpuRegister; }
virtual const char* ToCString() const {
const char* format = "%s(f32x4)";
intptr_t len =
OS::SNPrint(NULL, 0, format, Assembler::FpuRegisterName(reg_));
char* chars = Isolate::Current()->current_zone()->Alloc<char>(len + 1);
OS::SNPrint(chars, len + 1, format, Assembler::FpuRegisterName(reg_));
return chars;
}
void Execute(DeoptimizationContext* deopt_context, intptr_t to_index) {
simd128_value_t value = deopt_context->FpuRegisterValueAsSimd128(reg_);
intptr_t* to_addr = deopt_context->GetToFrameAddressAt(to_index);
*reinterpret_cast<RawSmi**>(to_addr) = Smi::New(0);
Isolate::Current()->DeferFloat32x4Materialization(
value, reinterpret_cast<RawFloat32x4**>(to_addr));
}
private:
const FpuRegister reg_;
DISALLOW_COPY_AND_ASSIGN(DeoptFloat32x4FpuRegisterInstr);
};
// Deoptimization instruction moving an XMM register.
class DeoptUint32x4FpuRegisterInstr: public DeoptInstr {
public:
explicit DeoptUint32x4FpuRegisterInstr(intptr_t reg_as_int)
: reg_(static_cast<FpuRegister>(reg_as_int)) {}
virtual intptr_t from_index() const { return static_cast<intptr_t>(reg_); }
virtual DeoptInstr::Kind kind() const { return kFloat32x4FpuRegister; }
virtual const char* ToCString() const {
const char* format = "%s(f32x4)";
intptr_t len =
OS::SNPrint(NULL, 0, format, Assembler::FpuRegisterName(reg_));
char* chars = Isolate::Current()->current_zone()->Alloc<char>(len + 1);
OS::SNPrint(chars, len + 1, format, Assembler::FpuRegisterName(reg_));
return chars;
}
void Execute(DeoptimizationContext* deopt_context, intptr_t to_index) {
simd128_value_t value = deopt_context->FpuRegisterValueAsSimd128(reg_);
intptr_t* to_addr = deopt_context->GetToFrameAddressAt(to_index);
*reinterpret_cast<RawSmi**>(to_addr) = Smi::New(0);
Isolate::Current()->DeferUint32x4Materialization(
value, reinterpret_cast<RawUint32x4**>(to_addr));
}
private:
const FpuRegister reg_;
DISALLOW_COPY_AND_ASSIGN(DeoptUint32x4FpuRegisterInstr);
};
// Deoptimization instruction creating a PC marker for the code of
// function at 'object_table_index'.
class DeoptPcMarkerInstr : public DeoptInstr {
@ -591,12 +729,20 @@ DeoptInstr* DeoptInstr::Create(intptr_t kind_as_int, intptr_t from_index) {
case kStackSlot: return new DeoptStackSlotInstr(from_index);
case kDoubleStackSlot: return new DeoptDoubleStackSlotInstr(from_index);
case kInt64StackSlot: return new DeoptInt64StackSlotInstr(from_index);
case kFloat32x4StackSlot:
return new DeoptFloat32x4StackSlotInstr(from_index);
case kUint32x4StackSlot:
return new DeoptUint32x4StackSlotInstr(from_index);
case kRetAfterAddress: return new DeoptRetAfterAddressInstr(from_index);
case kRetBeforeAddress: return new DeoptRetBeforeAddressInstr(from_index);
case kConstant: return new DeoptConstantInstr(from_index);
case kRegister: return new DeoptRegisterInstr(from_index);
case kFpuRegister: return new DeoptFpuRegisterInstr(from_index);
case kInt64FpuRegister: return new DeoptInt64FpuRegisterInstr(from_index);
case kFloat32x4FpuRegister:
return new DeoptFloat32x4FpuRegisterInstr(from_index);
case kUint32x4FpuRegister:
return new DeoptUint32x4FpuRegisterInstr(from_index);
case kPcMarker: return new DeoptPcMarkerInstr(from_index);
case kCallerFp: return new DeoptCallerFpInstr();
case kCallerPc: return new DeoptCallerPcInstr();
@ -726,6 +872,7 @@ void DeoptInfoBuilder::AddCopy(const Location& from_loc,
UNREACHABLE();
}
ASSERT(to_index == instructions_.length());
ASSERT(deopt_instr != NULL);
instructions_.Add(deopt_instr);
}

View file

@ -53,11 +53,16 @@ class DeoptimizationContext : public ValueObject {
}
double FpuRegisterValue(FpuRegister reg) const {
return fpu_registers_copy_[reg];
return *reinterpret_cast<double*>(&fpu_registers_copy_[reg]);
}
int64_t FpuRegisterValueAsInt64(FpuRegister reg) const {
return (reinterpret_cast<int64_t*>(fpu_registers_copy_))[reg];
return *reinterpret_cast<int64_t*>(&fpu_registers_copy_[reg]);
}
simd128_value_t FpuRegisterValueAsSimd128(FpuRegister reg) const {
const float* address = reinterpret_cast<float*>(&fpu_registers_copy_[reg]);
return simd128_value_t().readFrom(address);
}
Isolate* isolate() const { return isolate_; }
@ -73,7 +78,7 @@ class DeoptimizationContext : public ValueObject {
intptr_t* from_frame_;
intptr_t from_frame_size_;
intptr_t* registers_copy_;
double* fpu_registers_copy_;
fpu_register_t* fpu_registers_copy_;
const intptr_t num_args_;
const DeoptReasonId deopt_reason_;
intptr_t caller_fp_;
@ -96,9 +101,13 @@ class DeoptInstr : public ZoneAllocated {
kRegister,
kFpuRegister,
kInt64FpuRegister,
kFloat32x4FpuRegister,
kUint32x4FpuRegister,
kStackSlot,
kDoubleStackSlot,
kInt64StackSlot,
kFloat32x4StackSlot,
kUint32x4StackSlot,
kPcMarker,
kCallerFp,
kCallerPc,

View file

@ -1332,18 +1332,18 @@ void FlowGraphCompiler::SaveLiveRegisters(LocationSummary* locs) {
// TODO(vegorov): consider saving only caller save (volatile) registers.
const intptr_t xmm_regs_count = locs->live_registers()->fpu_regs_count();
if (xmm_regs_count > 0) {
__ subl(ESP, Immediate(xmm_regs_count * kDoubleSize));
__ subl(ESP, Immediate(xmm_regs_count * kFpuRegisterSize));
// Store XMM registers with the lowest register number at the lowest
// address.
intptr_t offset = 0;
for (intptr_t reg_idx = 0; reg_idx < kNumberOfXmmRegisters; ++reg_idx) {
XmmRegister xmm_reg = static_cast<XmmRegister>(reg_idx);
if (locs->live_registers()->ContainsFpuRegister(xmm_reg)) {
__ movsd(Address(ESP, offset), xmm_reg);
offset += kDoubleSize;
__ movups(Address(ESP, offset), xmm_reg);
offset += kFpuRegisterSize;
}
}
ASSERT(offset == (xmm_regs_count * kDoubleSize));
ASSERT(offset == (xmm_regs_count * kFpuRegisterSize));
}
// Store general purpose registers with the highest register number at the
@ -1374,11 +1374,11 @@ void FlowGraphCompiler::RestoreLiveRegisters(LocationSummary* locs) {
for (intptr_t reg_idx = 0; reg_idx < kNumberOfXmmRegisters; ++reg_idx) {
XmmRegister xmm_reg = static_cast<XmmRegister>(reg_idx);
if (locs->live_registers()->ContainsFpuRegister(xmm_reg)) {
__ movsd(xmm_reg, Address(ESP, offset));
offset += kDoubleSize;
__ movups(xmm_reg, Address(ESP, offset));
offset += kFpuRegisterSize;
}
}
ASSERT(offset == (xmm_regs_count * kDoubleSize));
ASSERT(offset == (xmm_regs_count * kFpuRegisterSize));
__ addl(ESP, Immediate(offset));
}
}

View file

@ -1332,18 +1332,18 @@ void FlowGraphCompiler::SaveLiveRegisters(LocationSummary* locs) {
// TODO(vegorov): consider saving only caller save (volatile) registers.
const intptr_t xmm_regs_count = locs->live_registers()->fpu_regs_count();
if (xmm_regs_count > 0) {
__ subq(RSP, Immediate(xmm_regs_count * kDoubleSize));
__ subq(RSP, Immediate(xmm_regs_count * kFpuRegisterSize));
// Store XMM registers with the lowest register number at the lowest
// address.
intptr_t offset = 0;
for (intptr_t reg_idx = 0; reg_idx < kNumberOfXmmRegisters; ++reg_idx) {
XmmRegister xmm_reg = static_cast<XmmRegister>(reg_idx);
if (locs->live_registers()->ContainsFpuRegister(xmm_reg)) {
__ movsd(Address(RSP, offset), xmm_reg);
offset += kDoubleSize;
__ movups(Address(RSP, offset), xmm_reg);
offset += kFpuRegisterSize;
}
}
ASSERT(offset == (xmm_regs_count * kDoubleSize));
ASSERT(offset == (xmm_regs_count * kFpuRegisterSize));
}
// Store general purpose registers with the highest register number at the
@ -1374,11 +1374,11 @@ void FlowGraphCompiler::RestoreLiveRegisters(LocationSummary* locs) {
for (intptr_t reg_idx = 0; reg_idx < kNumberOfXmmRegisters; ++reg_idx) {
XmmRegister xmm_reg = static_cast<XmmRegister>(reg_idx);
if (locs->live_registers()->ContainsFpuRegister(xmm_reg)) {
__ movsd(xmm_reg, Address(RSP, offset));
offset += kDoubleSize;
__ movups(xmm_reg, Address(RSP, offset));
offset += kFpuRegisterSize;
}
}
ASSERT(offset == (xmm_regs_count * kDoubleSize));
ASSERT(offset == (xmm_regs_count * kFpuRegisterSize));
__ addq(RSP, Immediate(offset));
}
}

View file

@ -31,7 +31,7 @@ DEFINE_FLAG(bool, report_usage_count, false,
"Track function usage and report.");
DEFINE_FLAG(bool, trace_isolates, false,
"Trace isolate creation and shut down.");
DECLARE_FLAG(bool, trace_deoptimization_verbose);
class IsolateMessageHandler : public MessageHandler {
public:
@ -255,6 +255,61 @@ void BaseIsolate::AssertCurrent(BaseIsolate* isolate) {
#endif
void DeferredDouble::Materialize() {
RawDouble** double_slot = reinterpret_cast<RawDouble**>(slot());
*double_slot = Double::New(value());
if (FLAG_trace_deoptimization_verbose) {
OS::PrintErr("materializing double at %"Px": %g\n",
reinterpret_cast<uword>(slot()), value());
}
}
void DeferredMint::Materialize() {
RawMint** mint_slot = reinterpret_cast<RawMint**>(slot());
ASSERT(!Smi::IsValid64(value()));
*mint_slot = Mint::New(value());
if (FLAG_trace_deoptimization_verbose) {
OS::PrintErr("materializing mint at %"Px": %"Pd64"\n",
reinterpret_cast<uword>(slot()), value());
}
}
void DeferredFloat32x4::Materialize() {
RawFloat32x4** float32x4_slot = reinterpret_cast<RawFloat32x4**>(slot());
RawFloat32x4* raw_float32x4 = Float32x4::New(value());
*float32x4_slot = raw_float32x4;
if (FLAG_trace_deoptimization_verbose) {
float x = raw_float32x4->x();
float y = raw_float32x4->y();
float z = raw_float32x4->z();
float w = raw_float32x4->w();
OS::PrintErr("materializing Float32x4 at %"Px": %g,%g,%g,%g\n",
reinterpret_cast<uword>(slot()), x, y, z, w);
}
}
void DeferredUint32x4::Materialize() {
RawUint32x4** uint32x4_slot = reinterpret_cast<RawUint32x4**>(slot());
RawUint32x4* raw_uint32x4 = Uint32x4::New(value());
*uint32x4_slot = raw_uint32x4;
if (FLAG_trace_deoptimization_verbose) {
uint32_t x = raw_uint32x4->x();
uint32_t y = raw_uint32x4->y();
uint32_t z = raw_uint32x4->z();
uint32_t w = raw_uint32x4->w();
OS::PrintErr("materializing Uint32x4 at %"Px": %x,%x,%x,%x\n",
reinterpret_cast<uword>(slot()), x, y, z, w);
}
}
Isolate::Isolate()
: store_buffer_block_(),
store_buffer_(),
@ -287,8 +342,7 @@ Isolate::Isolate()
deopt_fpu_registers_copy_(NULL),
deopt_frame_copy_(NULL),
deopt_frame_copy_size_(0),
deferred_doubles_(NULL),
deferred_mints_(NULL) {
deferred_objects_(NULL) {
}

View file

@ -30,6 +30,7 @@ class MessageHandler;
class Mutex;
class ObjectPointerVisitor;
class ObjectStore;
class RawInstance;
class RawArray;
class RawContext;
class RawDouble;
@ -40,47 +41,98 @@ class Simulator;
class StackResource;
class StackZone;
class StubCode;
class RawFloat32x4;
class RawUint32x4;
// Used by the deoptimization infrastructure to defer allocation of Double
// Used by the deoptimization infrastructure to defer allocation of unboxed
// objects until frame is fully rewritten and GC is safe.
// See callers of Isolate::DeferDoubleMaterialization.
class DeferredDouble {
// See callers of Isolate::DeferObjectMaterialization.
class DeferredObject {
public:
DeferredDouble(double value, RawDouble** slot, DeferredDouble* next)
: value_(value), slot_(slot), next_(next) { }
DeferredObject(RawInstance** slot, DeferredObject* next)
: slot_(slot), next_(next) { }
virtual ~DeferredObject() { }
RawInstance** slot() const { return slot_; }
DeferredObject* next() const { return next_; }
virtual void Materialize() = 0;
private:
RawInstance** const slot_;
DeferredObject* const next_;
DISALLOW_COPY_AND_ASSIGN(DeferredObject);
};
class DeferredDouble : public DeferredObject {
public:
DeferredDouble(double value, RawInstance** slot, DeferredObject* next)
: DeferredObject(slot, next), value_(value) { }
virtual void Materialize();
double value() const { return value_; }
RawDouble** slot() const { return slot_; }
DeferredDouble* next() const { return next_; }
private:
const double value_;
RawDouble** const slot_;
DeferredDouble* const next_;
DISALLOW_COPY_AND_ASSIGN(DeferredDouble);
};
class DeferredMint {
class DeferredMint : public DeferredObject {
public:
DeferredMint(int64_t value, RawMint** slot, DeferredMint* next)
: value_(value), slot_(slot), next_(next) { }
DeferredMint(int64_t value, RawInstance** slot, DeferredObject* next)
: DeferredObject(slot, next), value_(value) { }
virtual void Materialize();
int64_t value() const { return value_; }
RawMint** slot() const { return slot_; }
DeferredMint* next() const { return next_; }
private:
const int64_t value_;
RawMint** const slot_;
DeferredMint* const next_;
DISALLOW_COPY_AND_ASSIGN(DeferredMint);
};
class DeferredFloat32x4 : public DeferredObject {
public:
DeferredFloat32x4(simd128_value_t value, RawInstance** slot,
DeferredObject* next)
: DeferredObject(slot, next), value_(value) { }
virtual void Materialize();
simd128_value_t value() const { return value_; }
private:
const simd128_value_t value_;
DISALLOW_COPY_AND_ASSIGN(DeferredFloat32x4);
};
class DeferredUint32x4 : public DeferredObject {
public:
DeferredUint32x4(simd128_value_t value, RawInstance** slot,
DeferredObject* next)
: DeferredObject(slot, next), value_(value) { }
virtual void Materialize();
simd128_value_t value() const { return value_; }
private:
const simd128_value_t value_;
DISALLOW_COPY_AND_ASSIGN(DeferredUint32x4);
};
class Isolate : public BaseIsolate {
public:
~Isolate();
@ -308,10 +360,10 @@ class Isolate : public BaseIsolate {
ASSERT((value == NULL) || (deopt_cpu_registers_copy_ == NULL));
deopt_cpu_registers_copy_ = value;
}
double* deopt_fpu_registers_copy() const {
fpu_register_t* deopt_fpu_registers_copy() const {
return deopt_fpu_registers_copy_;
}
void set_deopt_fpu_registers_copy(double* value) {
void set_deopt_fpu_registers_copy(fpu_register_t* value) {
ASSERT((value == NULL) || (deopt_fpu_registers_copy_ == NULL));
deopt_fpu_registers_copy_ = value;
}
@ -325,22 +377,37 @@ class Isolate : public BaseIsolate {
intptr_t deopt_frame_copy_size() const { return deopt_frame_copy_size_; }
void DeferDoubleMaterialization(double value, RawDouble** slot) {
deferred_doubles_ = new DeferredDouble(value, slot, deferred_doubles_);
deferred_objects_ = new DeferredDouble(
value,
reinterpret_cast<RawInstance**>(slot),
deferred_objects_);
}
void DeferMintMaterialization(int64_t value, RawMint** slot) {
deferred_mints_ = new DeferredMint(value, slot, deferred_mints_);
deferred_objects_ = new DeferredMint(value,
reinterpret_cast<RawInstance**>(slot),
deferred_objects_);
}
DeferredDouble* DetachDeferredDoubles() {
DeferredDouble* list = deferred_doubles_;
deferred_doubles_ = NULL;
return list;
void DeferFloat32x4Materialization(simd128_value_t value,
RawFloat32x4** slot) {
deferred_objects_ = new DeferredFloat32x4(
value,
reinterpret_cast<RawInstance**>(slot),
deferred_objects_);
}
DeferredMint* DetachDeferredMints() {
DeferredMint* list = deferred_mints_;
deferred_mints_ = NULL;
void DeferUint32x4Materialization(simd128_value_t value,
RawUint32x4** slot) {
deferred_objects_ = new DeferredUint32x4(
value,
reinterpret_cast<RawInstance**>(slot),
deferred_objects_);
}
DeferredObject* DetachDeferredObjects() {
DeferredObject* list = deferred_objects_;
deferred_objects_ = NULL;
return list;
}
@ -385,11 +452,10 @@ class Isolate : public BaseIsolate {
// Deoptimization support.
intptr_t* deopt_cpu_registers_copy_;
double* deopt_fpu_registers_copy_;
fpu_register_t* deopt_fpu_registers_copy_;
intptr_t* deopt_frame_copy_;
intptr_t deopt_frame_copy_size_;
DeferredDouble* deferred_doubles_;
DeferredMint* deferred_mints_;
DeferredObject* deferred_objects_;
static Dart_IsolateCreateCallback create_callback_;
static Dart_IsolateInterruptCallback interrupt_callback_;

View file

@ -105,6 +105,8 @@ const char* Location::Name() const {
case kFpuRegister: return Assembler::FpuRegisterName(fpu_reg());
case kStackSlot: return "S";
case kDoubleStackSlot: return "DS";
case kFloat32x4StackSlot: return "F32x4S";
case kUint32x4StackSlot: return "UI32x4S";
case kUnallocated:
switch (policy()) {
case kAny:

View file

@ -21,6 +21,8 @@ enum Representation {
kUntagged,
kUnboxedDouble,
kUnboxedMint,
kUnboxedFloat32x4,
kUnboxedUint32x4,
kNumRepresentations
};
@ -38,7 +40,7 @@ class Location : public ValueObject {
private:
enum {
// Number of bits required to encode Kind value.
kBitsForKind = 3,
kBitsForKind = 4,
kBitsForPayload = kWordSize * kBitsPerByte - kBitsForKind,
};
@ -69,6 +71,8 @@ class Location : public ValueObject {
// a spill index.
kStackSlot = 3,
kDoubleStackSlot = 4,
kFloat32x4StackSlot = 8,
kUint32x4StackSlot = 10,
// Register location represents a fixed register. Payload contains
// register code.
@ -187,6 +191,7 @@ class Location : public ValueObject {
return RegisterField::decode(payload());
}
Representation representation() const {
return RepresentationField::decode(payload());
}
@ -231,14 +236,19 @@ class Location : public ValueObject {
return static_cast<intptr_t>(RegisterField::decode(payload()));
}
static uword make_stack_index_payload(intptr_t stack_index,
Representation rep) {
ASSERT((-kStackIndexBias <= stack_index) &&
(stack_index < kStackIndexBias));
uword payload =
IndexField::encode(static_cast<uword>(kStackIndexBias + stack_index));
return payload | RepresentationField::encode(rep);
}
// Spill slots.
static Location StackSlot(intptr_t stack_index,
Representation rep = kTagged) {
ASSERT((-kStackIndexBias <= stack_index) &&
(stack_index < kStackIndexBias));
uword payload =
IndexField::encode(static_cast<uword>(kStackIndexBias + stack_index))
| RepresentationField::encode(rep);
uword payload = make_stack_index_payload(stack_index, rep);
Location loc(kStackSlot, payload);
// Ensure that sign is preserved.
ASSERT(loc.stack_index() == stack_index);
@ -250,11 +260,7 @@ class Location : public ValueObject {
}
static Location DoubleStackSlot(intptr_t stack_index, Representation rep) {
ASSERT((-kStackIndexBias <= stack_index) &&
(stack_index < kStackIndexBias));
uword payload =
IndexField::encode(static_cast<uword>(kStackIndexBias + stack_index))
| RepresentationField::encode(rep);
uword payload = make_stack_index_payload(stack_index, rep);
Location loc(kDoubleStackSlot, payload);
// Ensure that sign is preserved.
ASSERT(loc.stack_index() == stack_index);
@ -265,9 +271,34 @@ class Location : public ValueObject {
return kind() == kDoubleStackSlot;
}
static Location Float32x4StackSlot(intptr_t stack_index, Representation rep) {
uword payload = make_stack_index_payload(stack_index, rep);
Location loc(kFloat32x4StackSlot, payload);
// Ensure that sign is preserved.
ASSERT(loc.stack_index() == stack_index);
return loc;
}
bool IsFloat32x4StackSlot() const {
return kind() == kFloat32x4StackSlot;
}
static Location Uint32x4StackSlot(intptr_t stack_index, Representation rep) {
uword payload = make_stack_index_payload(stack_index, rep);
Location loc(kUint32x4StackSlot, payload);
// Ensure that sign is preserved.
ASSERT(loc.stack_index() == stack_index);
return loc;
}
bool IsUint32x4StackSlot() const {
return kind() == kUint32x4StackSlot;
}
intptr_t stack_index() const {
ASSERT(IsStackSlot() || IsDoubleStackSlot());
ASSERT(IsStackSlot() || IsDoubleStackSlot() || IsFloat32x4StackSlot() ||
IsUint32x4StackSlot());
// Decode stack index manually to preserve sign.
return IndexField::decode(payload()) - kStackIndexBias;
}

View file

@ -12333,7 +12333,7 @@ const char* GrowableObjectArray::ToCString() const {
RawFloat32x4* Float32x4::New(float v0, float v1, float v2, float v3,
Heap::Space space) {
Heap::Space space) {
ASSERT(Isolate::Current()->object_store()->float32x4_class() !=
Class::null());
Float32x4& result = Float32x4::Handle();
@ -12352,7 +12352,7 @@ RawFloat32x4* Float32x4::New(float v0, float v1, float v2, float v3,
}
RawFloat32x4* Float32x4::New(simd_value_t value, Heap::Space space) {
RawFloat32x4* Float32x4::New(simd128_value_t value, Heap::Space space) {
ASSERT(Isolate::Current()->object_store()->float32x4_class() !=
Class::null());
Float32x4& result = Float32x4::Handle();
@ -12368,13 +12368,13 @@ ASSERT(Isolate::Current()->object_store()->float32x4_class() !=
}
simd_value_t Float32x4::value() const {
return simd_value_safe_load(&raw_ptr()->value_[0]);
simd128_value_t Float32x4::value() const {
return simd128_value_t().readFrom(&raw_ptr()->value_[0]);
}
void Float32x4::set_value(simd_value_t value) const {
simd_value_safe_store(&raw_ptr()->value_[0], value);
void Float32x4::set_value(simd128_value_t value) const {
value.writeTo(&raw_ptr()->value_[0]);
}
@ -12452,7 +12452,7 @@ RawUint32x4* Uint32x4::New(uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3,
}
RawUint32x4* Uint32x4::New(simd_value_t value, Heap::Space space) {
RawUint32x4* Uint32x4::New(simd128_value_t value, Heap::Space space) {
ASSERT(Isolate::Current()->object_store()->float32x4_class() !=
Class::null());
Uint32x4& result = Uint32x4::Handle();
@ -12508,13 +12508,13 @@ uint32_t Uint32x4::w() const {
}
simd_value_t Uint32x4::value() const {
return simd_value_safe_load(&raw_ptr()->value_[0]);
simd128_value_t Uint32x4::value() const {
return simd128_value_t().readFrom(&raw_ptr()->value_[0]);
}
void Uint32x4::set_value(simd_value_t value) const {
simd_value_safe_store(&raw_ptr()->value_[0], value);
void Uint32x4::set_value(simd128_value_t value) const {
value.writeTo(&raw_ptr()->value_[0]);
}
@ -12984,7 +12984,7 @@ const char* Uint64Array::ToCString() const {
RawFloat32x4Array* Float32x4Array::New(intptr_t len,
Heap::Space space) {
Heap::Space space) {
ASSERT(Isolate::Current()->object_store()->float32x4_array_class() !=
Class::null());
return NewImpl<Float32x4Array, RawFloat32x4Array>(kClassId, len,
@ -12992,9 +12992,9 @@ RawFloat32x4Array* Float32x4Array::New(intptr_t len,
}
RawFloat32x4Array* Float32x4Array::New(const simd_value_t* data,
intptr_t len,
Heap::Space space) {
RawFloat32x4Array* Float32x4Array::New(const simd128_value_t* data,
intptr_t len,
Heap::Space space) {
ASSERT(Isolate::Current()->object_store()->float32_array_class() !=
Class::null());
return NewImpl<Float32x4Array, RawFloat32x4Array>(kClassId, data,
@ -13187,16 +13187,15 @@ const char* ExternalUint64Array::ToCString() const {
}
RawExternalFloat32x4Array* ExternalFloat32x4Array::New(
simd_value_t* data,
intptr_t len,
Heap::Space space) {
RawExternalFloat32x4Array* ExternalFloat32x4Array::New(simd128_value_t* data,
intptr_t len,
Heap::Space space) {
RawClass* cls =
Isolate::Current()->object_store()->external_float32x4_array_class();
ASSERT(cls != Class::null());
return NewExternalImpl<ExternalFloat32x4Array,
RawExternalFloat32x4Array>(kClassId, data, len,
space);
space);
}

View file

@ -4944,7 +4944,8 @@ class Float32x4 : public Instance {
public:
static RawFloat32x4* New(float value0, float value1, float value2,
float value3, Heap::Space space = Heap::kNew);
static RawFloat32x4* New(simd_value_t value, Heap::Space space = Heap::kNew);
static RawFloat32x4* New(simd128_value_t value,
Heap::Space space = Heap::kNew);
float x() const;
float y() const;
@ -4956,8 +4957,8 @@ class Float32x4 : public Instance {
void set_z(float z) const;
void set_w(float w) const;
simd_value_t value() const;
void set_value(simd_value_t value) const;
simd128_value_t value() const;
void set_value(simd128_value_t value) const;
static intptr_t InstanceSize() {
return RoundedAllocationSize(sizeof(RawFloat32x4));
@ -4977,7 +4978,8 @@ class Uint32x4 : public Instance {
public:
static RawUint32x4* New(uint32_t value0, uint32_t value1, uint32_t value2,
uint32_t value3, Heap::Space space = Heap::kNew);
static RawUint32x4* New(simd_value_t value, Heap::Space space = Heap::kNew);
static RawUint32x4* New(simd128_value_t value,
Heap::Space space = Heap::kNew);
uint32_t x() const;
uint32_t y() const;
@ -4989,8 +4991,8 @@ class Uint32x4 : public Instance {
void set_z(uint32_t z) const;
void set_w(uint32_t w) const;
simd_value_t value() const;
void set_value(simd_value_t value) const;
simd128_value_t value() const;
void set_value(simd128_value_t value) const;
static intptr_t InstanceSize() {
return RoundedAllocationSize(sizeof(RawUint32x4));
@ -5743,16 +5745,14 @@ class Float32x4Array : public ByteArray {
return Length() * kBytesPerElement;
}
simd_value_t At(intptr_t index) const {
simd128_value_t At(intptr_t index) const {
ASSERT((index >= 0) && (index < Length()));
simd_value_t* load_ptr = &raw_ptr()->data_[index];
return simd_value_safe_load(load_ptr);
return raw_ptr()->data_[index];
}
void SetAt(intptr_t index, simd_value_t value) const {
void SetAt(intptr_t index, simd128_value_t value) const {
ASSERT((index >= 0) && (index < Length()));
simd_value_t* store_ptr = &raw_ptr()->data_[index];
simd_value_safe_store(store_ptr, value);
raw_ptr()->data_[index] = value;
}
static const intptr_t kBytesPerElement = 16;
@ -5776,7 +5776,7 @@ class Float32x4Array : public ByteArray {
static RawFloat32x4Array* New(intptr_t len,
Heap::Space space = Heap::kNew);
static RawFloat32x4Array* New(const simd_value_t* data,
static RawFloat32x4Array* New(const simd128_value_t* data,
intptr_t len,
Heap::Space space = Heap::kNew);
@ -6397,20 +6397,18 @@ class ExternalFloat32x4Array : public ByteArray {
return Length() * kBytesPerElement;
}
simd_value_t At(intptr_t index) const {
simd128_value_t At(intptr_t index) const {
ASSERT((index >= 0) && (index < Length()));
simd_value_t* load_ptr = &raw_ptr()->data_[index];
return simd_value_safe_load(load_ptr);
return raw_ptr()->data_[index];
}
void SetAt(intptr_t index, simd_value_t value) const {
void SetAt(intptr_t index, simd128_value_t value) const {
ASSERT((index >= 0) && (index < Length()));
simd_value_t* store_ptr = &raw_ptr()->data_[index];
simd_value_safe_store(store_ptr, value);
raw_ptr()->data_[index] = value;
}
simd_value_t* GetData() const {
simd128_value_t* GetData() const {
return raw_ptr()->data_;
}
@ -6428,9 +6426,8 @@ class ExternalFloat32x4Array : public ByteArray {
return RoundedAllocationSize(sizeof(RawExternalFloat32x4Array));
}
static RawExternalFloat32x4Array* New(simd_value_t* data,
intptr_t len,
Heap::Space space = Heap::kNew);
static RawExternalFloat32x4Array* New(simd128_value_t* data, intptr_t len,
Heap::Space space = Heap::kNew);
private:
uint8_t* ByteAddr(intptr_t byte_offset) const {
@ -6439,7 +6436,7 @@ class ExternalFloat32x4Array : public ByteArray {
return data + byte_offset;
}
void SetData(simd_value_t* data) const {
void SetData(simd128_value_t* data) const {
raw_ptr()->data_ = data;
}

View file

@ -1389,6 +1389,11 @@ class RawFloat32x4 : public RawInstance {
float value_[4];
friend class SnapshotReader;
public:
float x() const { return value_[0]; }
float y() const { return value_[1]; }
float z() const { return value_[2]; }
float w() const { return value_[3]; }
};
@ -1398,6 +1403,11 @@ class RawUint32x4 : public RawInstance {
uint32_t value_[4];
friend class SnapshotReader;
public:
uint32_t x() const { return value_[0]; }
uint32_t y() const { return value_[1]; }
uint32_t z() const { return value_[2]; }
uint32_t w() const { return value_[3]; }
};
@ -1531,7 +1541,7 @@ class RawFloat32x4Array : public RawByteArray {
RAW_HEAP_OBJECT_IMPLEMENTATION(Float32x4Array);
// Variable length data follows here.
simd_value_t data_[0];
simd128_value_t data_[0];
};
class RawFloat32Array : public RawByteArray {
@ -1626,7 +1636,7 @@ class RawExternalUint64Array : public RawByteArray {
class RawExternalFloat32x4Array : public RawByteArray {
RAW_HEAP_OBJECT_IMPLEMENTATION(ExternalFloat32x4Array);
simd_value_t* data_;
simd128_value_t* data_;
void* peer_;
};

View file

@ -2265,13 +2265,13 @@ RawFloat32x4Array* Float32x4Array::ReadFrom(SnapshotReader* reader,
result.set_tags(tags);
// Setup the array elements.
float v[4];
simd128_value_t v;
for (intptr_t i = 0; i < len; ++i) {
v[0] = reader->Read<float>();
v[1] = reader->Read<float>();
v[2] = reader->Read<float>();
v[3] = reader->Read<float>();
result.SetAt(i, simd_value_safe_load(&v[0]));
v.storage[0] = reader->Read<float>();
v.storage[1] = reader->Read<float>();
v.storage[2] = reader->Read<float>();
v.storage[3] = reader->Read<float>();
result.SetAt(i, v);
}
return result.raw();
}
@ -2297,7 +2297,7 @@ RawExternal##name##Array* External##name##Array::ReadFrom( \
} \
BYTEARRAY_TYPE_LIST(EXTERNALARRAY_READ_FROM)
EXTERNALARRAY_READ_FROM(Float32x4, Float32x4, simd_value_t)
EXTERNALARRAY_READ_FROM(Float32x4, Float32x4, simd128_value_t)
#undef EXTERNALARRAY_READ_FROM

View file

@ -345,12 +345,12 @@ static void GenerateDeoptimizationSequence(Assembler* assembler,
for (intptr_t i = kNumberOfCpuRegisters - 1; i >= 0; i--) {
__ pushl(static_cast<Register>(i));
}
__ subl(ESP, Immediate(kNumberOfXmmRegisters * kDoubleSize));
__ subl(ESP, Immediate(kNumberOfXmmRegisters * kFpuRegisterSize));
intptr_t offset = 0;
for (intptr_t reg_idx = 0; reg_idx < kNumberOfXmmRegisters; ++reg_idx) {
XmmRegister xmm_reg = static_cast<XmmRegister>(reg_idx);
__ movsd(Address(ESP, offset), xmm_reg);
offset += kDoubleSize;
__ movups(Address(ESP, offset), xmm_reg);
offset += kFpuRegisterSize;
}
__ movl(ECX, ESP); // Saved saved registers block.

View file

@ -339,12 +339,12 @@ static void GenerateDeoptimizationSequence(Assembler* assembler,
for (intptr_t i = kNumberOfCpuRegisters - 1; i >= 0; i--) {
__ pushq(static_cast<Register>(i));
}
__ subq(RSP, Immediate(kNumberOfXmmRegisters * kDoubleSize));
__ subq(RSP, Immediate(kNumberOfXmmRegisters * kFpuRegisterSize));
intptr_t offset = 0;
for (intptr_t reg_idx = 0; reg_idx < kNumberOfXmmRegisters; ++reg_idx) {
XmmRegister xmm_reg = static_cast<XmmRegister>(reg_idx);
__ movsd(Address(RSP, offset), xmm_reg);
offset += kDoubleSize;
__ movups(Address(RSP, offset), xmm_reg);
offset += kFpuRegisterSize;
}
__ movq(RCX, RSP); // Saved saved registers block.