Add support for XMM registers in SSA code generation pipeline.

Split BinaryDoubleOp into several instructions that manipulate unboxed doubles.

R=fschneider@google.com
BUG=

Review URL: https://chromiumcodereview.appspot.com//10875030

git-svn-id: https://dart.googlecode.com/svn/branches/bleeding_edge/dart@11313 260f80e4-7a28-3924-810f-c04153c831b5
This commit is contained in:
vegorov@google.com 2012-08-24 14:45:42 +00:00
parent ea0037ddc4
commit 264c4f6d1a
29 changed files with 1257 additions and 209 deletions

View file

@ -161,6 +161,7 @@ typedef uintptr_t uword;
// Byte sizes.
const int kWordSize = sizeof(word);
const int kDoubleSize = sizeof(double); // NOLINT
#ifdef ARCH_IS_32_BIT
const int kWordSizeLog2 = 2;
const uword kUwordMax = kMaxUint32;

View file

@ -426,6 +426,14 @@ void Assembler::movsd(XmmRegister dst, XmmRegister src) {
}
void Assembler::movaps(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x0F);
EmitUint8(0x28);
EmitXmmRegisterOperand(dst, src);
}
void Assembler::addsd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
@ -1790,6 +1798,17 @@ const char* Assembler::RegisterName(Register reg) {
}
static const char* xmm_reg_names[kNumberOfXmmRegisters] = {
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
};
const char* Assembler::XmmRegisterName(XmmRegister reg) {
ASSERT((0 <= reg) && (reg < kNumberOfXmmRegisters));
return xmm_reg_names[reg];
}
} // namespace dart
#endif // defined TARGET_ARCH_IA32

View file

@ -330,6 +330,8 @@ class Assembler : public ValueObject {
void movsd(const Address& dst, XmmRegister src);
void movsd(XmmRegister dst, XmmRegister src);
void movaps(XmmRegister dst, XmmRegister src);
void addsd(XmmRegister dst, XmmRegister src);
void addsd(XmmRegister dst, const Address& src);
void subsd(XmmRegister dst, XmmRegister src);
@ -581,6 +583,7 @@ class Assembler : public ValueObject {
const Code::Comments& GetCodeComments() const;
static const char* RegisterName(Register reg);
static const char* XmmRegisterName(XmmRegister reg);
private:
AssemblerBuffer buffer_;

View file

@ -845,7 +845,19 @@ ASSEMBLER_TEST_GENERATE(DoubleFPMoves, assembler) {
__ movsd(XMM7, XMM6);
__ movl(Address(ESP, 0), Immediate(0));
__ movl(Address(ESP, kWordSize), Immediate(0));
__ movsd(XMM0, Address(ESP, 0));
__ movsd(Address(ESP, 0), XMM7);
__ movsd(XMM7, Address(ESP, 0));
__ movaps(XMM6, XMM7);
__ movaps(XMM5, XMM6);
__ movaps(XMM4, XMM5);
__ movaps(XMM3, XMM4);
__ movaps(XMM2, XMM3);
__ movaps(XMM1, XMM2);
__ movaps(XMM0, XMM1);
__ movl(Address(ESP, 0), Immediate(0));
__ movl(Address(ESP, kWordSize), Immediate(0));
__ movsd(Address(ESP, 0), XMM0);
__ fldl(Address(ESP, 0));
__ popl(EAX);
__ popl(EAX);

View file

@ -19,6 +19,7 @@ DECLARE_FLAG(bool, inline_alloc);
void AssemblerMacros::TryAllocate(Assembler* assembler,
const Class& cls,
Label* failure,
bool near_jump,
Register instance_reg) {
ASSERT(failure != NULL);
if (FLAG_inline_alloc) {
@ -28,7 +29,7 @@ void AssemblerMacros::TryAllocate(Assembler* assembler,
__ addl(instance_reg, Immediate(instance_size));
// instance_reg: potential next object start.
__ cmpl(instance_reg, Address::Absolute(heap->EndAddress()));
__ j(ABOVE_EQUAL, failure, Assembler::kNearJump);
__ j(ABOVE_EQUAL, failure, near_jump);
// Successfully allocated the object, now update top to point to
// next object start and store the class in the class field of object.
__ movl(Address::Absolute(heap->TopAddress()), instance_reg);

View file

@ -30,6 +30,7 @@ class AssemblerMacros : public AllStatic {
static void TryAllocate(Assembler* assembler,
const Class& cls,
Label* failure,
bool near_jump,
Register instance_reg);
// Set up a dart frame on entry with a frame pointer and PC information to

View file

@ -19,6 +19,7 @@ DECLARE_FLAG(bool, inline_alloc);
void AssemblerMacros::TryAllocate(Assembler* assembler,
const Class& cls,
Label* failure,
bool near_jump,
Register instance_reg) {
ASSERT(failure != NULL);
if (FLAG_inline_alloc) {
@ -30,7 +31,7 @@ void AssemblerMacros::TryAllocate(Assembler* assembler,
// instance_reg: potential next object start.
__ movq(TMP, Immediate(heap->EndAddress()));
__ cmpq(instance_reg, Address(TMP, 0));
__ j(ABOVE_EQUAL, failure, Assembler::kNearJump);
__ j(ABOVE_EQUAL, failure, near_jump);
// Successfully allocated the object, now update top to point to
// next object start and store the class in the class field of object.
__ movq(TMP, Immediate(heap->TopAddress()));

View file

@ -30,6 +30,7 @@ class AssemblerMacros : public AllStatic {
static void TryAllocate(Assembler* assembler,
const Class& cls,
Label* failure,
bool near_jump,
Register instance_reg);
// Set up a dart frame on entry with a frame pointer and PC information to

View file

@ -479,6 +479,17 @@ void Assembler::movsd(XmmRegister dst, XmmRegister src) {
}
void Assembler::movaps(XmmRegister dst, XmmRegister src) {
// TODO(vegorov): implement and test XMM8 - XMM15.
ASSERT(src <= XMM7);
ASSERT(dst <= XMM7);
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x0F);
EmitUint8(0x28);
EmitXmmRegisterOperand(dst & 7, src);
}
void Assembler::addsd(XmmRegister dst, XmmRegister src) {
// TODO(srdjan): implement and test XMM8 - XMM15.
ASSERT(src <= XMM7);
@ -1921,6 +1932,17 @@ const char* Assembler::RegisterName(Register reg) {
}
static const char* xmm_reg_names[kNumberOfXmmRegisters] = {
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
};
const char* Assembler::XmmRegisterName(XmmRegister reg) {
ASSERT((0 <= reg) && (reg < kNumberOfXmmRegisters));
return xmm_reg_names[reg];
}
} // namespace dart
#endif // defined TARGET_ARCH_X64

View file

@ -343,6 +343,8 @@ class Assembler : public ValueObject {
void movsd(const Address& dst, XmmRegister src);
void movsd(XmmRegister dst, XmmRegister src);
void movaps(XmmRegister dst, XmmRegister src);
void addsd(XmmRegister dst, XmmRegister src);
void subsd(XmmRegister dst, XmmRegister src);
void mulsd(XmmRegister dst, XmmRegister src);
@ -616,6 +618,8 @@ class Assembler : public ValueObject {
static const char* RegisterName(Register reg);
static const char* XmmRegisterName(XmmRegister reg);
private:
AssemblerBuffer buffer_;
int prolog_offset_;

View file

@ -1320,7 +1320,14 @@ ASSEMBLER_TEST_GENERATE(DoubleFPMoves, assembler) {
__ movq(RAX, RSP);
__ movsd(Address(RAX, 0), XMM3);
__ movsd(XMM4, Address(RAX, 0));
__ movsd(XMM0, Address(RSP, 0));
__ movsd(XMM7, Address(RSP, 0));
__ movaps(XMM6, XMM7);
__ movaps(XMM5, XMM6);
__ movaps(XMM4, XMM5);
__ movaps(XMM3, XMM4);
__ movaps(XMM2, XMM3);
__ movaps(XMM1, XMM2);
__ movaps(XMM0, XMM1);
__ popq(RAX);
__ popq(R15); // Callee saved.
__ ret();

View file

@ -177,7 +177,13 @@ static bool CompileParsedFunctionHelper(const ParsedFunction& parsed_function,
}
if (optimized) {
FlowGraphOptimizer optimizer(*flow_graph);
// TODO(vegorov): we need to compute uses for the
// purposes of unboxing. Move unboxing to a later
// stage.
// Compute the use lists.
flow_graph->ComputeUseLists();
FlowGraphOptimizer optimizer(flow_graph);
optimizer.ApplyICData();
// Compute the use lists.

View file

@ -60,6 +60,8 @@ class FlowGraph: public ZoneAllocated {
return graph_entry_;
}
intptr_t alloc_ssa_temp_index() { return current_ssa_temp_index_++; }
// Operations on the flow graph.
void ComputeSSA();
void ComputeUseLists();
@ -96,7 +98,6 @@ class FlowGraph: public ZoneAllocated {
void MarkLivePhis(GrowableArray<PhiInstr*>* live_phis);
intptr_t current_ssa_temp_index() const { return current_ssa_temp_index_; }
intptr_t alloc_ssa_temp_index() { return current_ssa_temp_index_++; }
// DiscoverBlocks computes parent_ and assigned_vars_ which are then used
// if/when computing SSA.

View file

@ -35,6 +35,9 @@ static const intptr_t kTempVirtualRegister = -2;
static const intptr_t kIllegalPosition = -1;
static const intptr_t kMaxPosition = 0x7FFFFFFF;
// Number of stack slots needed for a double spill slot.
static const intptr_t kDoubleSpillSlotFactor = kDoubleSize / kWordSize;
static intptr_t MinPosition(intptr_t a, intptr_t b) {
return (a < b) ? a : b;
@ -66,15 +69,21 @@ FlowGraphAllocator::FlowGraphAllocator(const FlowGraph& flow_graph)
vreg_count_(flow_graph.max_virtual_register_number()),
live_ranges_(flow_graph.max_virtual_register_number()),
cpu_regs_(),
blocked_cpu_regs_() {
xmm_regs_(),
blocked_cpu_registers_(),
blocked_xmm_registers_(),
cpu_spill_slot_count_(0) {
for (intptr_t i = 0; i < vreg_count_; i++) live_ranges_.Add(NULL);
blocked_cpu_regs_[CTX] = true;
blocked_cpu_registers_[CTX] = true;
if (TMP != kNoRegister) {
blocked_cpu_regs_[TMP] = true;
blocked_cpu_registers_[TMP] = true;
}
blocked_cpu_regs_[SPREG] = true;
blocked_cpu_regs_[FPREG] = true;
blocked_cpu_registers_[SPREG] = true;
blocked_cpu_registers_[FPREG] = true;
// XMM0 is used as scratch by optimized code and parallel move resolver.
blocked_xmm_registers_[XMM0] = true;
}
@ -295,15 +304,13 @@ void FlowGraphAllocator::DumpLiveness() {
void LiveRange::AddUse(intptr_t pos, Location* location_slot) {
ASSERT(location_slot != NULL);
ASSERT((first_use_interval_->start_ <= pos) &&
(pos <= first_use_interval_->end_));
if ((uses_ != NULL) && (uses_->pos() == pos)) {
if ((location_slot == NULL) || (uses_->location_slot() == location_slot)) {
return;
} else if (uses_->location_slot() == NULL) {
uses_->set_location_slot(location_slot);
return;
}
if ((uses_ != NULL) &&
(uses_->pos() == pos) &&
(uses_->location_slot() == location_slot)) {
return;
}
uses_ = new UsePosition(pos, uses_, location_slot);
}
@ -406,22 +413,39 @@ LiveRange* FlowGraphAllocator::MakeLiveRangeForTemporary() {
}
// Block location from the start of the instruction to its end.
void FlowGraphAllocator::BlockLocation(Location loc,
intptr_t from,
intptr_t to) {
ASSERT(loc.IsRegister());
const Register reg = loc.reg();
if (blocked_cpu_regs_[reg]) return;
if (cpu_regs_[reg].length() == 0) {
void FlowGraphAllocator::BlockRegisterLocation(Location loc,
intptr_t from,
intptr_t to,
bool* blocked_registers,
LiveRange** blocking_ranges) {
if (blocked_registers[loc.register_code()]) {
return;
}
if (blocking_ranges[loc.register_code()] == NULL) {
LiveRange* range = new LiveRange(kNoVirtualRegister);
cpu_regs_[reg].Add(range);
blocking_ranges[loc.register_code()] = range;
range->set_assigned_location(loc);
#if defined(DEBUG)
temporaries_.Add(range);
#endif
}
cpu_regs_[reg][0]->AddUseInterval(from, to);
blocking_ranges[loc.register_code()]->AddUseInterval(from, to);
}
// Block location from the start of the instruction to its end.
void FlowGraphAllocator::BlockLocation(Location loc,
intptr_t from,
intptr_t to) {
if (loc.IsRegister()) {
BlockRegisterLocation(loc, from, to, blocked_cpu_registers_, cpu_regs_);
} else if (loc.IsXmmRegister()) {
BlockRegisterLocation(loc, from, to, blocked_xmm_registers_, xmm_regs_);
} else {
UNREACHABLE();
}
}
@ -540,7 +564,9 @@ void FlowGraphAllocator::BuildLiveRanges() {
LiveRange* tail = SplitBetween(range,
graph_entry->start_pos(),
use->pos());
AddToUnallocated(tail);
// All incomming parameters are tagged.
CompleteRange(tail, Location::kRegister);
}
ConvertAllUses(range);
if (flow_graph_.copied_parameter_count() > 0) {
@ -675,7 +701,9 @@ void FlowGraphAllocator::ConnectIncomingPhiMoves(BlockEntryInstr* block) {
// All phi resolution moves are connected. Phi's live range is
// complete.
AssignSafepoints(range);
AddToUnallocated(range);
// TODO(vegorov): unboxed double phis.
CompleteRange(range, Location::kRegister);
move_idx++;
}
@ -732,6 +760,24 @@ void FlowGraphAllocator::ProcessEnvironmentUses(BlockEntryInstr* block,
}
static Location::Kind RegisterKindFromPolicy(Location loc) {
if (loc.policy() == Location::kRequiresXmmRegister) {
return Location::kXmmRegister;
} else {
return Location::kRegister;
}
}
static Location::Kind RegisterKindForResult(Instruction* instr) {
if (instr->representation() == kUnboxedDouble) {
return Location::kXmmRegister;
} else {
return Location::kRegister;
}
}
// Create and update live ranges corresponding to instruction's inputs,
// temporaries and output.
void FlowGraphAllocator::ProcessOneInstruction(BlockEntryInstr* block,
@ -748,7 +794,7 @@ void FlowGraphAllocator::ProcessOneInstruction(BlockEntryInstr* block,
// fixed register.
if (locs->out().IsUnallocated() &&
(locs->out().policy() == Location::kSameAsFirstInput) &&
(locs->in(0).IsRegister())) {
(locs->in(0).IsMachineRegister())) {
locs->set_out(locs->in(0));
}
@ -772,7 +818,7 @@ void FlowGraphAllocator::ProcessOneInstruction(BlockEntryInstr* block,
Location* in_ref = locs->in_slot(j);
if (in_ref->IsRegister()) {
if (in_ref->IsMachineRegister()) {
// Input is expected in a fixed register. Expected shape of
// live ranges:
//
@ -807,13 +853,13 @@ void FlowGraphAllocator::ProcessOneInstruction(BlockEntryInstr* block,
//
Location temp = locs->temp(j);
if (temp.IsRegister()) {
if (temp.IsMachineRegister()) {
BlockLocation(temp, pos, pos + 1);
} else if (temp.IsUnallocated()) {
LiveRange* range = MakeLiveRangeForTemporary();
range->AddUseInterval(pos, pos + 1);
range->AddUse(pos, locs->temp_slot(j));
AddToUnallocated(range);
CompleteRange(range, RegisterKindFromPolicy(temp));
} else {
UNREACHABLE();
}
@ -872,7 +918,7 @@ void FlowGraphAllocator::ProcessOneInstruction(BlockEntryInstr* block,
Location* out = locs->out_slot();
// Process output and finalize its liverange.
if (out->IsRegister()) {
if (out->IsMachineRegister()) {
// Fixed output location. Expected shape of live range:
//
// i i' j j'
@ -915,7 +961,8 @@ void FlowGraphAllocator::ProcessOneInstruction(BlockEntryInstr* block,
// input #0 --*
// output [----
//
ASSERT(locs->in_slot(0)->Equals(Location::RequiresRegister()));
ASSERT(locs->in(0).Equals(Location::RequiresRegister()) ||
locs->in(0).Equals(Location::RequiresXmmRegister()));
// Create move that will copy value between input and output.
locs->set_out(Location::RequiresRegister());
@ -934,7 +981,7 @@ void FlowGraphAllocator::ProcessOneInstruction(BlockEntryInstr* block,
// Shorten output live range to the point of definition and add both input
// and output uses slots to be filled by allocator.
range->DefineAt(pos);
range->AddUse(pos, out);
range->AddHintedUse(pos, out, move->src_slot());
range->AddUse(pos, move->dest_slot());
range->AddUse(pos, locs->in_slot(0));
} else {
@ -944,8 +991,8 @@ void FlowGraphAllocator::ProcessOneInstruction(BlockEntryInstr* block,
// i i'
// output [-------
//
ASSERT(out->IsUnallocated() &&
(out->policy() == Location::kRequiresRegister));
ASSERT(locs->out().Equals(Location::RequiresRegister()) ||
locs->out().Equals(Location::RequiresXmmRegister()));
// Shorten live range to the point of definition and add use to be filled by
// allocator.
@ -954,7 +1001,7 @@ void FlowGraphAllocator::ProcessOneInstruction(BlockEntryInstr* block,
}
AssignSafepoints(range);
AddToUnallocated(range);
CompleteRange(range, RegisterKindForResult(current));
}
@ -1149,8 +1196,7 @@ UsePosition* AllocationFinger::FirstRegisterUse(intptr_t after) {
use != NULL;
use = use->next()) {
Location* loc = use->location_slot();
if ((loc != NULL) &&
loc->IsUnallocated() &&
if (loc->IsUnallocated() &&
(loc->policy() == Location::kRequiresRegister)) {
first_register_use_ = use;
return use;
@ -1165,9 +1211,7 @@ UsePosition* AllocationFinger::FirstRegisterBeneficialUse(intptr_t after) {
use != NULL;
use = use->next()) {
Location* loc = use->location_slot();
if ((loc != NULL) &&
(loc->IsRegister() ||
(loc->IsUnallocated() && loc->IsRegisterBeneficial()))) {
if (loc->IsUnallocated() && loc->IsRegisterBeneficial()) {
first_register_beneficial_use_ = use;
return use;
}
@ -1396,7 +1440,13 @@ void FlowGraphAllocator::AllocateSpillSlotFor(LiveRange* range) {
spill_slots_[idx] = last_sibling->End();
range->set_spill_slot(Location::StackSlot(idx));
if (register_kind_ == Location::kRegister) {
range->set_spill_slot(Location::StackSlot(idx));
} else {
range->set_spill_slot(
Location::DoubleStackSlot(
cpu_spill_slot_count_ + idx * kDoubleSpillSlotFactor));
}
spilled_.Add(range);
}
@ -1421,7 +1471,9 @@ void FlowGraphAllocator::Spill(LiveRange* range) {
LiveRange* parent = GetLiveRange(range->vreg());
if (parent->spill_slot().IsInvalid()) {
AllocateSpillSlotFor(parent);
MarkAsObjectAtSafepoints(parent);
if (register_kind_ == Location::kRegister) {
MarkAsObjectAtSafepoints(parent);
}
}
range->set_assigned_location(parent->spill_slot());
ConvertAllUses(range);
@ -1429,10 +1481,10 @@ void FlowGraphAllocator::Spill(LiveRange* range) {
intptr_t FlowGraphAllocator::FirstIntersectionWithAllocated(
Register reg, LiveRange* unallocated) {
intptr_t reg, LiveRange* unallocated) {
intptr_t intersection = kMaxPosition;
for (intptr_t i = 0; i < cpu_regs_[reg].length(); i++) {
LiveRange* allocated = cpu_regs_[reg][i];
for (intptr_t i = 0; i < registers_[reg].length(); i++) {
LiveRange* allocated = registers_[reg][i];
if (allocated == NULL) continue;
UseInterval* allocated_head =
@ -1448,18 +1500,18 @@ intptr_t FlowGraphAllocator::FirstIntersectionWithAllocated(
}
bool FlowGraphAllocator::AllocateFreeRegister(LiveRange* unallocated) {
Register candidate = kNoRegister;
intptr_t candidate = kNoRegister;
intptr_t free_until = 0;
// If hint is available try hint first.
// TODO(vegorov): ensure that phis are hinted on the back edge.
Location hint = unallocated->finger()->FirstHint();
if (hint.IsRegister()) {
if (!blocked_cpu_regs_[hint.reg()]) {
free_until = FirstIntersectionWithAllocated(hint.reg(), unallocated);
candidate = hint.reg();
if (hint.IsMachineRegister()) {
if (!blocked_registers_[hint.register_code()]) {
free_until = FirstIntersectionWithAllocated(hint.register_code(),
unallocated);
candidate = hint.register_code();
}
TRACE_ALLOC(OS::Print("found hint "));
@ -1467,9 +1519,9 @@ bool FlowGraphAllocator::AllocateFreeRegister(LiveRange* unallocated) {
TRACE_ALLOC(OS::Print(" for %d: free until %d\n",
unallocated->vreg(), free_until));
} else if (free_until != kMaxPosition) {
for (intptr_t reg = 0; reg < kNumberOfCpuRegisters; ++reg) {
if (!blocked_cpu_regs_[reg] && cpu_regs_[reg].length() == 0) {
candidate = static_cast<Register>(reg);
for (intptr_t reg = 0; reg < NumberOfRegisters(); ++reg) {
if (!blocked_registers_[reg] && (registers_[reg].length() == 0)) {
candidate = reg;
free_until = kMaxPosition;
break;
}
@ -1478,13 +1530,12 @@ bool FlowGraphAllocator::AllocateFreeRegister(LiveRange* unallocated) {
ASSERT(0 <= kMaxPosition);
if (free_until != kMaxPosition) {
for (intptr_t reg = 0; reg < kNumberOfCpuRegisters; ++reg) {
if (blocked_cpu_regs_[reg] || (reg == candidate)) continue;
for (intptr_t reg = 0; reg < NumberOfRegisters(); ++reg) {
if (blocked_registers_[reg] || (reg == candidate)) continue;
const intptr_t intersection =
FirstIntersectionWithAllocated(static_cast<Register>(reg),
unallocated);
FirstIntersectionWithAllocated(reg, unallocated);
if (intersection > free_until) {
candidate = static_cast<Register>(reg);
candidate = reg;
free_until = intersection;
if (free_until == kMaxPosition) break;
}
@ -1495,7 +1546,7 @@ bool FlowGraphAllocator::AllocateFreeRegister(LiveRange* unallocated) {
if (free_until <= unallocated->Start()) return false;
TRACE_ALLOC(OS::Print("assigning free register "));
TRACE_ALLOC(Location::RegisterLocation(candidate).Print());
TRACE_ALLOC(MakeRegisterLocation(candidate).Print());
TRACE_ALLOC(OS::Print(" to %d\n", unallocated->vreg()));
if (free_until != kMaxPosition) {
@ -1505,8 +1556,8 @@ bool FlowGraphAllocator::AllocateFreeRegister(LiveRange* unallocated) {
AddToUnallocated(tail);
}
cpu_regs_[candidate].Add(unallocated);
unallocated->set_assigned_location(Location::RegisterLocation(candidate));
registers_[candidate].Add(unallocated);
unallocated->set_assigned_location(MakeRegisterLocation(candidate));
return true;
}
@ -1520,17 +1571,14 @@ void FlowGraphAllocator::AllocateAnyRegister(LiveRange* unallocated) {
return;
}
Register candidate = kNoRegister;
intptr_t candidate = kNoRegister;
intptr_t free_until = 0;
intptr_t blocked_at = kMaxPosition;
for (int reg = 0; reg < kNumberOfCpuRegisters; ++reg) {
if (blocked_cpu_regs_[reg]) continue;
if (UpdateFreeUntil(static_cast<Register>(reg),
unallocated,
&free_until,
&blocked_at)) {
candidate = static_cast<Register>(reg);
for (int reg = 0; reg < NumberOfRegisters(); ++reg) {
if (blocked_registers_[reg]) continue;
if (UpdateFreeUntil(reg, unallocated, &free_until, &blocked_at)) {
candidate = reg;
}
}
@ -1542,7 +1590,7 @@ void FlowGraphAllocator::AllocateAnyRegister(LiveRange* unallocated) {
}
TRACE_ALLOC(OS::Print("assigning blocked register "));
TRACE_ALLOC(Location::RegisterLocation(candidate).Print());
TRACE_ALLOC(MakeRegisterLocation(candidate).Print());
TRACE_ALLOC(OS::Print(" to live range %d until %d\n",
unallocated->vreg(), blocked_at));
@ -1559,7 +1607,7 @@ void FlowGraphAllocator::AllocateAnyRegister(LiveRange* unallocated) {
}
bool FlowGraphAllocator::UpdateFreeUntil(Register reg,
bool FlowGraphAllocator::UpdateFreeUntil(intptr_t reg,
LiveRange* unallocated,
intptr_t* cur_free_until,
intptr_t* cur_blocked_at) {
@ -1567,8 +1615,8 @@ bool FlowGraphAllocator::UpdateFreeUntil(Register reg,
intptr_t blocked_at = kMaxPosition;
const intptr_t start = unallocated->Start();
for (intptr_t i = 0; i < cpu_regs_[reg].length(); i++) {
LiveRange* allocated = cpu_regs_[reg][i];
for (intptr_t i = 0; i < registers_[reg].length(); i++) {
LiveRange* allocated = registers_[reg][i];
UseInterval* first_pending_use_interval =
allocated->finger()->first_pending_use_interval();
@ -1616,22 +1664,22 @@ bool FlowGraphAllocator::UpdateFreeUntil(Register reg,
}
void FlowGraphAllocator::RemoveEvicted(Register reg, intptr_t first_evicted) {
void FlowGraphAllocator::RemoveEvicted(intptr_t reg, intptr_t first_evicted) {
intptr_t to = first_evicted;
intptr_t from = first_evicted + 1;
while (from < cpu_regs_[reg].length()) {
LiveRange* allocated = cpu_regs_[reg][from++];
if (allocated != NULL) cpu_regs_[reg][to++] = allocated;
while (from < registers_[reg].length()) {
LiveRange* allocated = registers_[reg][from++];
if (allocated != NULL) registers_[reg][to++] = allocated;
}
cpu_regs_[reg].TruncateTo(to);
registers_[reg].TruncateTo(to);
}
void FlowGraphAllocator::AssignNonFreeRegister(LiveRange* unallocated,
Register reg) {
intptr_t reg) {
intptr_t first_evicted = -1;
for (intptr_t i = cpu_regs_[reg].length() - 1; i >= 0; i--) {
LiveRange* allocated = cpu_regs_[reg][i];
for (intptr_t i = registers_[reg].length() - 1; i >= 0; i--) {
LiveRange* allocated = registers_[reg][i];
if (allocated->vreg() < 0) continue; // Can't be evicted.
if (EvictIntersection(allocated, unallocated)) {
// If allocated was not spilled convert all pending uses.
@ -1639,7 +1687,7 @@ void FlowGraphAllocator::AssignNonFreeRegister(LiveRange* unallocated,
ASSERT(allocated->End() <= unallocated->Start());
ConvertAllUses(allocated);
}
cpu_regs_[reg][i] = NULL;
registers_[reg][i] = NULL;
first_evicted = i;
}
}
@ -1647,8 +1695,8 @@ void FlowGraphAllocator::AssignNonFreeRegister(LiveRange* unallocated,
// Remove evicted ranges from the array.
if (first_evicted != -1) RemoveEvicted(reg, first_evicted);
cpu_regs_[reg].Add(unallocated);
unallocated->set_assigned_location(Location::RegisterLocation(reg));
registers_[reg].Add(unallocated);
unallocated->set_assigned_location(MakeRegisterLocation(reg));
}
@ -1700,9 +1748,6 @@ void FlowGraphAllocator::ConvertUseTo(UsePosition* use, Location loc) {
ASSERT(use->location_slot() != NULL);
Location* slot = use->location_slot();
ASSERT(slot->IsUnallocated());
ASSERT((slot->policy() == Location::kRequiresRegister) ||
(slot->policy() == Location::kPrefersRegister) ||
(slot->policy() == Location::kAny));
TRACE_ALLOC(OS::Print(" use at %d converted to ", use->pos()));
TRACE_ALLOC(loc.Print());
TRACE_ALLOC(OS::Print("\n"));
@ -1712,53 +1757,48 @@ void FlowGraphAllocator::ConvertUseTo(UsePosition* use, Location loc) {
void FlowGraphAllocator::ConvertAllUses(LiveRange* range) {
if (range->vreg() == kNoVirtualRegister) return;
const Location loc = range->assigned_location();
ASSERT(!loc.IsInvalid());
TRACE_ALLOC(OS::Print("range [%d, %d) for v%d has been allocated to ",
range->Start(), range->End(), range->vreg()));
TRACE_ALLOC(range->assigned_location().Print());
TRACE_ALLOC(loc.Print());
TRACE_ALLOC(OS::Print(":\n"));
ASSERT(!range->assigned_location().IsInvalid());
const Location loc = range->assigned_location();
for (UsePosition* use = range->first_use(); use != NULL; use = use->next()) {
ConvertUseTo(use, loc);
}
if (range->assigned_location().IsRegister()) {
Register reg = range->assigned_location().reg();
if (loc.IsMachineRegister()) {
for (SafepointPosition* safepoint = range->first_safepoint();
safepoint != NULL;
safepoint = safepoint->next()) {
safepoint->locs()->live_registers()->Add(reg);
safepoint->locs()->live_registers()->Add(loc);
}
}
}
void FlowGraphAllocator::AdvanceActiveIntervals(const intptr_t start) {
for (intptr_t reg = 0; reg < kNumberOfCpuRegisters; reg++) {
if (cpu_regs_[reg].is_empty()) continue;
for (intptr_t reg = 0; reg < NumberOfRegisters(); reg++) {
if (registers_[reg].is_empty()) continue;
intptr_t first_evicted = -1;
for (intptr_t i = cpu_regs_[reg].length() - 1; i >= 0; i--) {
LiveRange* range = cpu_regs_[reg][i];
for (intptr_t i = registers_[reg].length() - 1; i >= 0; i--) {
LiveRange* range = registers_[reg][i];
if (range->finger()->Advance(start)) {
ConvertAllUses(range);
cpu_regs_[reg][i] = NULL;
registers_[reg][i] = NULL;
first_evicted = i;
}
}
if (first_evicted != -1) {
RemoveEvicted(static_cast<Register>(reg), first_evicted);
}
if (first_evicted != -1) RemoveEvicted(reg, first_evicted);
}
}
static inline bool ShouldBeAllocatedBefore(LiveRange* a, LiveRange* b) {
return a->Start() <= b->Start();
}
bool LiveRange::Contains(intptr_t pos) const {
if (!CanCover(pos)) return false;
@ -1786,21 +1826,49 @@ void FlowGraphAllocator::AssignSafepoints(LiveRange* range) {
}
void FlowGraphAllocator::AddToUnallocated(LiveRange* range) {
static inline bool ShouldBeAllocatedBefore(LiveRange* a, LiveRange* b) {
// TODO(vegorov): consider first hint position when ordering live ranges.
return a->Start() <= b->Start();
}
static void AddToSortedListOfRanges(GrowableArray<LiveRange*>* list,
LiveRange* range) {
range->finger()->Initialize(range);
if (unallocated_.is_empty()) {
unallocated_.Add(range);
if (list->is_empty()) {
list->Add(range);
return;
}
for (intptr_t i = unallocated_.length() - 1; i >= 0; i--) {
if (ShouldBeAllocatedBefore(range, unallocated_[i])) {
unallocated_.InsertAt(i + 1, range);
for (intptr_t i = list->length() - 1; i >= 0; i--) {
if (ShouldBeAllocatedBefore(range, (*list)[i])) {
list->InsertAt(i + 1, range);
return;
}
}
unallocated_.InsertAt(0, range);
list->InsertAt(0, range);
}
void FlowGraphAllocator::AddToUnallocated(LiveRange* range) {
AddToSortedListOfRanges(&unallocated_, range);
}
void FlowGraphAllocator::CompleteRange(LiveRange* range, Location::Kind kind) {
switch (kind) {
case Location::kRegister:
AddToSortedListOfRanges(&unallocated_cpu_, range);
break;
case Location::kXmmRegister:
AddToSortedListOfRanges(&unallocated_xmm_, range);
break;
default:
UNREACHABLE();
}
}
@ -1816,18 +1884,36 @@ bool FlowGraphAllocator::UnallocatedIsSorted() {
#endif
void FlowGraphAllocator::AllocateCPURegisters() {
void FlowGraphAllocator::PrepareForAllocation(
Location::Kind register_kind,
intptr_t number_of_registers,
const GrowableArray<LiveRange*>& unallocated,
LiveRange** blocking_ranges,
bool* blocked_registers) {
register_kind_ = register_kind;
number_of_registers_ = number_of_registers;
ASSERT(unallocated_.is_empty());
unallocated_.AddArray(unallocated);
for (intptr_t reg = 0; reg < number_of_registers; reg++) {
blocked_registers_[reg] = blocked_registers[reg];
ASSERT(registers_[reg].is_empty());
LiveRange* range = blocking_ranges[reg];
if (range != NULL) {
range->finger()->Initialize(range);
registers_[reg].Add(range);
}
}
}
void FlowGraphAllocator::AllocateUnallocatedRanges() {
#if defined(DEBUG)
ASSERT(UnallocatedIsSorted());
#endif
for (intptr_t i = 0; i < kNumberOfCpuRegisters; i++) {
if (cpu_regs_[i].length() == 1) {
LiveRange* range = cpu_regs_[i][0];
range->finger()->Initialize(range);
}
}
while (!unallocated_.is_empty()) {
LiveRange* range = unallocated_.Last();
unallocated_.RemoveLast();
@ -1910,7 +1996,7 @@ void FlowGraphAllocator::ConnectSplitSiblings(LiveRange* parent,
if (source.Equals(target)) return;
// Values are eagerly spilled. Spill slot already contains appropriate value.
if (target.IsStackSlot()) {
if (target.IsStackSlot() || target.IsDoubleStackSlot()) {
ASSERT(parent->spill_slot().Equals(target));
return;
}
@ -1943,6 +2029,7 @@ void FlowGraphAllocator::ResolveControlFlow() {
TRACE_ALLOC(OS::Print("]\n"));
if ((range->End() == sibling->Start()) &&
!sibling->assigned_location().IsStackSlot() &&
!sibling->assigned_location().IsDoubleStackSlot() &&
!range->assigned_location().Equals(sibling->assigned_location()) &&
!IsBlockEntry(range->End())) {
AddMoveAt(sibling->Start(),
@ -1970,7 +2057,8 @@ void FlowGraphAllocator::ResolveControlFlow() {
// this will cause spilling to occur on the fast path (at the definition).
for (intptr_t i = 0; i < spilled_.length(); i++) {
LiveRange* range = spilled_[i];
if (range->assigned_location().IsStackSlot()) {
if (range->assigned_location().IsStackSlot() ||
range->assigned_location().IsDoubleStackSlot()) {
ASSERT(range->assigned_location().Equals(range->spill_slot()));
} else {
AddMoveAt(range->Start() + 1,
@ -2011,13 +2099,42 @@ void FlowGraphAllocator::AllocateRegisters() {
OS::Print("----------------------------------------------\n");
}
AllocateCPURegisters();
PrepareForAllocation(Location::kRegister,
kNumberOfCpuRegisters,
unallocated_cpu_,
cpu_regs_,
blocked_cpu_registers_);
AllocateUnallocatedRanges();
cpu_spill_slot_count_ = spill_slots_.length();
spill_slots_.Clear();
PrepareForAllocation(Location::kXmmRegister,
kNumberOfXmmRegisters,
unallocated_xmm_,
xmm_regs_,
blocked_xmm_registers_);
AllocateUnallocatedRanges();
ResolveControlFlow();
// Reserve spill slots for XMM registers alive across slow path code.
// TODO(vegorov): remove this code when safepoints with registers are
// implemented.
intptr_t deferred_xmm_spills = 0;
for (intptr_t i = 0; i < safepoints_.length(); i++) {
if (!safepoints_[i]->locs()->always_calls()) {
const intptr_t count =
safepoints_[i]->locs()->live_registers()->xmm_regs_count();
if (count > deferred_xmm_spills) deferred_xmm_spills = count;
}
}
GraphEntryInstr* entry = block_order_[0]->AsGraphEntry();
ASSERT(entry != NULL);
entry->set_spill_slot_count(spill_slots_.length());
entry->set_spill_slot_count(
(deferred_xmm_spills + spill_slots_.length()) * kDoubleSpillSlotFactor +
cpu_spill_slot_count_);
if (FLAG_print_ssa_liveranges) {
const Function& function = flow_graph_.parsed_function().function();

View file

@ -94,13 +94,27 @@ class FlowGraphAllocator : public ValueObject {
void ProcessOneInstruction(BlockEntryInstr* block, Instruction* instr);
void ConnectIncomingPhiMoves(BlockEntryInstr* block);
void BlockLocation(Location loc, intptr_t from, intptr_t to);
void BlockRegisterLocation(Location loc,
intptr_t from,
intptr_t to,
bool* blocked_registers,
LiveRange** blocking_ranges);
intptr_t NumberOfRegisters() const { return number_of_registers_; }
// Find all safepoints that are covered by this live range.
void AssignSafepoints(LiveRange* range);
void PrepareForAllocation(Location::Kind register_kind,
intptr_t number_of_registers,
const GrowableArray<LiveRange*>& unallocated,
LiveRange** blocking_ranges,
bool* blocked_registers);
// Process live ranges sorted by their start and assign registers
// to them
void AllocateCPURegisters();
void AllocateUnallocatedRanges();
void AdvanceActiveIntervals(const intptr_t start);
// Connect split siblings over non-linear control flow edges.
@ -118,6 +132,7 @@ class FlowGraphAllocator : public ValueObject {
// Add live range to the list of unallocated live ranges to be processed
// by the allocator.
void AddToUnallocated(LiveRange* range);
void CompleteRange(LiveRange* range, Location::Kind kind);
#if defined(DEBUG)
bool UnallocatedIsSorted();
#endif
@ -135,16 +150,16 @@ class FlowGraphAllocator : public ValueObject {
// evict any interference that can be evicted by splitting and spilling
// parts of interfering live ranges. Place non-spilled parts into
// the list of unallocated ranges.
void AssignNonFreeRegister(LiveRange* unallocated, Register reg);
void AssignNonFreeRegister(LiveRange* unallocated, intptr_t reg);
bool EvictIntersection(LiveRange* allocated, LiveRange* unallocated);
void RemoveEvicted(Register reg, intptr_t first_evicted);
void RemoveEvicted(intptr_t reg, intptr_t first_evicted);
// Find first intersection between unallocated live range and
// live ranges currently allocated to the given register.
intptr_t FirstIntersectionWithAllocated(Register reg,
intptr_t FirstIntersectionWithAllocated(intptr_t reg,
LiveRange* unallocated);
bool UpdateFreeUntil(Register reg,
bool UpdateFreeUntil(intptr_t reg,
LiveRange* unallocated,
intptr_t* cur_free_until,
intptr_t* cur_blocked_at);
@ -171,6 +186,10 @@ class FlowGraphAllocator : public ValueObject {
MoveOperands* AddMoveAt(intptr_t pos, Location to, Location from);
Location MakeRegisterLocation(intptr_t reg) {
return Location::MachineRegisterLocation(register_kind_, reg);
}
void PrintLiveRanges();
const FlowGraph& flow_graph_;
@ -204,9 +223,14 @@ class FlowGraphAllocator : public ValueObject {
// LiveRanges corresponding to SSA values.
GrowableArray<LiveRange*> live_ranges_;
// Worklist for register allocator. Always maintained sorted according
// to ShouldBeAllocatedBefore predicate.
GrowableArray<LiveRange*> unallocated_;
GrowableArray<LiveRange*> unallocated_cpu_;
GrowableArray<LiveRange*> unallocated_xmm_;
LiveRange* cpu_regs_[kNumberOfCpuRegisters];
LiveRange* xmm_regs_[kNumberOfXmmRegisters];
bool blocked_cpu_registers_[kNumberOfCpuRegisters];
bool blocked_xmm_registers_[kNumberOfXmmRegisters];
#if defined(DEBUG)
GrowableArray<LiveRange*> temporaries_;
@ -218,17 +242,28 @@ class FlowGraphAllocator : public ValueObject {
// List of instructions containing calls.
GrowableArray<Instruction*> safepoints_;
Location::Kind register_kind_;
intptr_t number_of_registers_;
// Per register lists of allocated live ranges. Contain only those
// ranges that can be affected by future allocation decisions.
// Those live ranges that end before the start of the current live range are
// removed from the list and will not be affected.
GrowableArray<LiveRange*> cpu_regs_[kNumberOfCpuRegisters];
GrowableArray<LiveRange*> registers_[kNumberOfCpuRegisters];
bool blocked_registers_[kNumberOfCpuRegisters];
// Worklist for register allocator. Always maintained sorted according
// to ShouldBeAllocatedBefore predicate.
GrowableArray<LiveRange*> unallocated_;
// List of used spill slots. Contains positions after which spill slots
// become free and can be reused for allocation.
GrowableArray<intptr_t> spill_slots_;
intptr_t cpu_spill_slot_count_;
bool blocked_cpu_regs_[kNumberOfCpuRegisters];
DISALLOW_COPY_AND_ASSIGN(FlowGraphAllocator);
};
@ -275,7 +310,9 @@ class BlockInfo : public ZoneAllocated {
class UsePosition : public ZoneAllocated {
public:
UsePosition(intptr_t pos, UsePosition* next, Location* location_slot)
: pos_(pos), location_slot_(location_slot), hint_(NULL), next_(next) { }
: pos_(pos), location_slot_(location_slot), hint_(NULL), next_(next) {
ASSERT(location_slot != NULL);
}
Location* location_slot() const { return location_slot_; }
void set_location_slot(Location* location_slot) {

View file

@ -199,27 +199,6 @@ bool FlowGraphCompiler::IsNextBlock(BlockEntryInstr* block_entry) const {
}
void FlowGraphCompiler::SaveLiveRegisters(LocationSummary* locs) {
// TODO(vegorov): consider saving only caller save (volatile) registers.
for (intptr_t reg_idx = 0; reg_idx < kNumberOfCpuRegisters; ++reg_idx) {
Register reg = static_cast<Register>(reg_idx);
if (locs->live_registers()->Contains(reg)) {
assembler()->PushRegister(reg);
}
}
}
void FlowGraphCompiler::RestoreLiveRegisters(LocationSummary* locs) {
for (intptr_t reg_idx = kNumberOfCpuRegisters - 1; reg_idx >= 0; --reg_idx) {
Register reg = static_cast<Register>(reg_idx);
if (locs->live_registers()->Contains(reg)) {
assembler()->PopRegister(reg);
}
}
}
void FlowGraphCompiler::AddSlowPathCode(SlowPathCode* code) {
slow_path_code_.Add(code);
}
@ -648,6 +627,9 @@ void FlowGraphCompiler::AllocateRegistersLocally(Instruction* instr) {
case Location::kSameAsFirstInput:
result_location = locs->in(0);
break;
case Location::kRequiresXmmRegister:
UNREACHABLE();
break;
}
locs->set_out(result_location);
}

View file

@ -880,6 +880,7 @@ void FlowGraphCompiler::GenerateInlinedMathSqrt(Label* done) {
AssemblerMacros::TryAllocate(assembler_,
double_class_,
&call_method,
Assembler::kNearJump,
EAX); // Result register.
__ movsd(FieldAddress(EAX, Double::value_offset()), XMM0);
__ Drop(1);
@ -1106,12 +1107,56 @@ void FlowGraphCompiler::LoadDoubleOrSmiToXmm(XmmRegister result,
}
void FlowGraphCompiler::SaveLiveRegisters(LocationSummary* locs) {
// TODO(vegorov): consider saving only caller save (volatile) registers.
const intptr_t xmm_regs_count = locs->live_registers()->xmm_regs_count();
if (xmm_regs_count > 0) {
intptr_t stack_offs = (StackSize() + 1) * kWordSize;
for (intptr_t reg_idx = 0; reg_idx < kNumberOfXmmRegisters; ++reg_idx) {
XmmRegister xmm_reg = static_cast<XmmRegister>(reg_idx);
if (locs->live_registers()->ContainsXmmRegister(xmm_reg)) {
__ movsd(Address(EBP, -stack_offs), xmm_reg);
stack_offs += kDoubleSize;
}
}
}
for (intptr_t reg_idx = 0; reg_idx < kNumberOfCpuRegisters; ++reg_idx) {
Register reg = static_cast<Register>(reg_idx);
if (locs->live_registers()->ContainsRegister(reg)) {
__ pushl(reg);
}
}
}
void FlowGraphCompiler::RestoreLiveRegisters(LocationSummary* locs) {
for (intptr_t reg_idx = kNumberOfCpuRegisters - 1; reg_idx >= 0; --reg_idx) {
Register reg = static_cast<Register>(reg_idx);
if (locs->live_registers()->ContainsRegister(reg)) {
__ popl(reg);
}
}
const intptr_t xmm_regs_count = locs->live_registers()->xmm_regs_count();
if (xmm_regs_count > 0) {
intptr_t stack_offs = (StackSize() + 1) * kWordSize;
for (intptr_t reg_idx = 0; reg_idx < kNumberOfXmmRegisters; ++reg_idx) {
XmmRegister xmm_reg = static_cast<XmmRegister>(reg_idx);
if (locs->live_registers()->ContainsXmmRegister(xmm_reg)) {
__ movsd(xmm_reg, Address(EBP, -stack_offs));
stack_offs += kDoubleSize;
}
}
}
}
#undef __
#define __ compiler_->assembler()->
static Address ToStackSlotAddress(Location loc) {
ASSERT(loc.IsStackSlot());
const intptr_t index = loc.stack_index();
if (index < 0) {
const intptr_t offset = (1 - index) * kWordSize;
@ -1144,6 +1189,23 @@ void ParallelMoveResolver::EmitMove(int index) {
MoveMemoryToMemory(ToStackSlotAddress(destination),
ToStackSlotAddress(source));
}
} else if (source.IsXmmRegister()) {
if (destination.IsXmmRegister()) {
// Optimization manual recommends using MOVAPS for register
// to register moves.
__ movaps(destination.xmm_reg(), source.xmm_reg());
} else {
ASSERT(destination.IsDoubleStackSlot());
__ movsd(ToStackSlotAddress(destination), source.xmm_reg());
}
} else if (source.IsDoubleStackSlot()) {
if (destination.IsXmmRegister()) {
__ movsd(destination.xmm_reg(), ToStackSlotAddress(source));
} else {
ASSERT(destination.IsDoubleStackSlot());
__ movsd(XMM0, ToStackSlotAddress(source));
__ movsd(ToStackSlotAddress(destination), XMM0);
}
} else {
ASSERT(source.IsConstant());
if (destination.IsRegister()) {
@ -1171,6 +1233,20 @@ void ParallelMoveResolver::EmitSwap(int index) {
Exchange(destination.reg(), ToStackSlotAddress(source));
} else if (source.IsStackSlot() && destination.IsStackSlot()) {
Exchange(ToStackSlotAddress(destination), ToStackSlotAddress(source));
} else if (source.IsXmmRegister() && destination.IsXmmRegister()) {
__ movaps(XMM0, source.xmm_reg());
__ movaps(source.xmm_reg(), destination.xmm_reg());
__ movaps(destination.xmm_reg(), XMM0);
} else if (source.IsXmmRegister() || destination.IsXmmRegister()) {
ASSERT(destination.IsDoubleStackSlot() || source.IsDoubleStackSlot());
XmmRegister reg = source.IsXmmRegister() ? source.xmm_reg()
: destination.xmm_reg();
Address slot_address =
ToStackSlotAddress(source.IsXmmRegister() ? destination : source);
__ movsd(XMM0, slot_address);
__ movsd(slot_address, reg);
__ movaps(reg, XMM0);
} else {
UNREACHABLE();
}

View file

@ -885,6 +885,7 @@ void FlowGraphCompiler::GenerateInlinedMathSqrt(Label* done) {
AssemblerMacros::TryAllocate(assembler_,
double_class_,
&call_method,
Assembler::kNearJump,
RAX); // Result register.
__ movsd(FieldAddress(RAX, Double::value_offset()), XMM0);
__ Drop(1);
@ -1114,12 +1115,60 @@ void FlowGraphCompiler::LoadDoubleOrSmiToXmm(XmmRegister result,
}
void FlowGraphCompiler::SaveLiveRegisters(LocationSummary* locs) {
// TODO(vegorov): consider saving only caller save (volatile) registers.
const intptr_t xmm_regs_count = locs->live_registers()->xmm_regs_count();
if (xmm_regs_count > 0) {
// Pointer maps don't support pushed untagged values so we reserve spill
// slots at the top of the spill slot area for live XMM registers.
intptr_t stack_offs = (StackSize() + 1) * kWordSize;
for (intptr_t reg_idx = 0; reg_idx < kNumberOfXmmRegisters; ++reg_idx) {
XmmRegister xmm_reg = static_cast<XmmRegister>(reg_idx);
if (locs->live_registers()->ContainsXmmRegister(xmm_reg)) {
__ movsd(Address(RBP, -stack_offs), xmm_reg);
stack_offs += kDoubleSize;
}
}
}
for (intptr_t reg_idx = 0; reg_idx < kNumberOfCpuRegisters; ++reg_idx) {
Register reg = static_cast<Register>(reg_idx);
if (locs->live_registers()->ContainsRegister(reg)) {
__ pushq(reg);
}
}
}
void FlowGraphCompiler::RestoreLiveRegisters(LocationSummary* locs) {
for (intptr_t reg_idx = kNumberOfCpuRegisters - 1; reg_idx >= 0; --reg_idx) {
Register reg = static_cast<Register>(reg_idx);
if (locs->live_registers()->ContainsRegister(reg)) {
__ popq(reg);
}
}
const intptr_t xmm_regs_count = locs->live_registers()->xmm_regs_count();
if (xmm_regs_count > 0) {
// Pointer maps don't support pushed untagged values so we reserve spill
// slots at the top of the spill slot area for live XMM registers.
intptr_t stack_offs = (StackSize() + 1) * kWordSize;
for (intptr_t reg_idx = 0; reg_idx < kNumberOfXmmRegisters; ++reg_idx) {
XmmRegister xmm_reg = static_cast<XmmRegister>(reg_idx);
if (locs->live_registers()->ContainsXmmRegister(xmm_reg)) {
__ movsd(xmm_reg, Address(RBP, -stack_offs));
stack_offs += kDoubleSize;
}
}
}
}
#undef __
#define __ compiler_->assembler()->
static Address ToStackSlotAddress(Location loc) {
ASSERT(loc.IsStackSlot());
const intptr_t index = loc.stack_index();
if (index < 0) {
const intptr_t offset = (1 - index) * kWordSize;
@ -1152,6 +1201,23 @@ void ParallelMoveResolver::EmitMove(int index) {
MoveMemoryToMemory(ToStackSlotAddress(destination),
ToStackSlotAddress(source));
}
} else if (source.IsXmmRegister()) {
if (destination.IsXmmRegister()) {
// Optimization manual recommends using MOVAPS for register
// to register moves.
__ movaps(destination.xmm_reg(), source.xmm_reg());
} else {
ASSERT(destination.IsDoubleStackSlot());
__ movsd(ToStackSlotAddress(destination), source.xmm_reg());
}
} else if (source.IsDoubleStackSlot()) {
if (destination.IsXmmRegister()) {
__ movsd(destination.xmm_reg(), ToStackSlotAddress(source));
} else {
ASSERT(destination.IsDoubleStackSlot());
__ movsd(XMM0, ToStackSlotAddress(source));
__ movsd(ToStackSlotAddress(destination), XMM0);
}
} else {
ASSERT(source.IsConstant());
if (destination.IsRegister()) {
@ -1179,6 +1245,20 @@ void ParallelMoveResolver::EmitSwap(int index) {
Exchange(destination.reg(), ToStackSlotAddress(source));
} else if (source.IsStackSlot() && destination.IsStackSlot()) {
Exchange(ToStackSlotAddress(destination), ToStackSlotAddress(source));
} else if (source.IsXmmRegister() && destination.IsXmmRegister()) {
__ movaps(XMM0, source.xmm_reg());
__ movaps(source.xmm_reg(), destination.xmm_reg());
__ movaps(destination.xmm_reg(), XMM0);
} else if (source.IsXmmRegister() || destination.IsXmmRegister()) {
ASSERT(destination.IsDoubleStackSlot() || source.IsDoubleStackSlot());
XmmRegister reg = source.IsXmmRegister() ? source.xmm_reg()
: destination.xmm_reg();
Address slot_address =
ToStackSlotAddress(source.IsXmmRegister() ? destination : source);
__ movsd(XMM0, slot_address);
__ movsd(slot_address, reg);
__ movaps(reg, XMM0);
} else {
UNREACHABLE();
}

View file

@ -20,6 +20,7 @@ DECLARE_FLAG(bool, enable_type_checks);
DEFINE_FLAG(bool, trace_optimization, false, "Print optimization details.");
DECLARE_FLAG(bool, trace_type_check_elimination);
DEFINE_FLAG(bool, use_cha, true, "Use class hierarchy analysis.");
DEFINE_FLAG(bool, use_unboxed_doubles, true, "Try unboxing double values.");
void FlowGraphOptimizer::ApplyICData() {
VisitBlocks();
@ -191,28 +192,15 @@ static intptr_t ReceiverClassId(Computation* comp) {
}
// Insert a check computation before an instruction and set the environment
// of the check to the same as the instruction.
static void InsertCheckBefore(BindInstr* instr,
Computation* check,
Environment* env) {
BindInstr* check_instr = new BindInstr(BindInstr::kUnused, check);
check_instr->InsertBefore(instr);
ASSERT(env != NULL);
// Attach an environment to the check instruction.
check_instr->set_env(env);
}
static void AddCheckClass(BindInstr* instr,
InstanceCallComp* comp,
Value* value) {
void FlowGraphOptimizer::AddCheckClass(BindInstr* instr,
InstanceCallComp* comp,
Value* value) {
// Type propagation has not run yet, we cannot eliminate the check.
CheckClassComp* check = new CheckClassComp(value, comp);
const ICData& unary_checks =
ICData::ZoneHandle(comp->ic_data()->AsUnaryClassChecks());
check->set_ic_data(&unary_checks);
InsertCheckBefore(instr, check, instr->env());
InsertBefore(instr, check, instr->env(), BindInstr::kUnused);
// Detach environment from the original instruction because it can't
// deoptimize.
instr->set_env(NULL);
@ -258,6 +246,34 @@ bool FlowGraphOptimizer::TryReplaceWithArrayOp(BindInstr* instr,
}
BindInstr* FlowGraphOptimizer::InsertBefore(Instruction* instr,
Computation* comp,
Environment* env,
BindInstr::UseKind use_kind) {
BindInstr* bind = new BindInstr(use_kind, comp);
if (env != NULL) bind->set_env(env->Copy());
if (use_kind == BindInstr::kUsed) {
bind->set_ssa_temp_index(flow_graph_->alloc_ssa_temp_index());
}
bind->InsertBefore(instr);
return bind;
}
BindInstr* FlowGraphOptimizer::InsertAfter(Instruction* instr,
Computation* comp,
Environment* env,
BindInstr::UseKind use_kind) {
BindInstr* bind = new BindInstr(use_kind, comp);
if (env != NULL) bind->set_env(env->Copy());
if (use_kind == BindInstr::kUsed) {
bind->set_ssa_temp_index(flow_graph_->alloc_ssa_temp_index());
}
bind->InsertAfter(instr);
return bind;
}
bool FlowGraphOptimizer::TryReplaceWithBinaryOp(BindInstr* instr,
InstanceCallComp* comp,
Token::Kind op_kind) {
@ -309,9 +325,52 @@ bool FlowGraphOptimizer::TryReplaceWithBinaryOp(BindInstr* instr,
ASSERT(comp->ArgumentCount() == 2);
if (operands_type == kDoubleCid) {
BinaryDoubleOpComp* double_bin_op = new BinaryDoubleOpComp(op_kind, comp);
double_bin_op->set_ic_data(comp->ic_data());
instr->set_computation(double_bin_op);
if (FLAG_use_unboxed_doubles) {
Value* left = comp->ArgumentAt(0)->value();
Value* right = comp->ArgumentAt(1)->value();
// Check that either left or right are not a smi. Result or a
// binary operation with two smis is a smi not a double.
InsertBefore(instr,
new CheckEitherNonSmiComp(left, right, comp),
instr->env(),
BindInstr::kUnused);
// Unbox operands.
BindInstr* unbox_left = InsertBefore(
instr,
new UnboxDoubleComp(left->CopyValue(), comp),
instr->env(),
BindInstr::kUsed);
BindInstr* unbox_right = InsertBefore(
instr,
new UnboxDoubleComp(right->CopyValue(), comp),
instr->env(),
BindInstr::kUsed);
UnboxedDoubleBinaryOpComp* double_bin_op =
new UnboxedDoubleBinaryOpComp(op_kind,
new UseVal(unbox_left),
new UseVal(unbox_right));
double_bin_op->set_ic_data(comp->ic_data());
instr->set_computation(double_bin_op);
if (instr->is_used()) {
// Box result.
UseVal* use_val = new UseVal(instr);
BindInstr* bind = InsertAfter(instr,
new BoxDoubleComp(use_val, comp),
NULL,
BindInstr::kUsed);
instr->ReplaceUsesWith(bind);
}
RemovePushArguments(comp);
} else {
BinaryDoubleOpComp* double_bin_op = new BinaryDoubleOpComp(op_kind, comp);
double_bin_op->set_ic_data(comp->ic_data());
instr->set_computation(double_bin_op);
}
} else if (operands_type == kMintCid) {
Value* left = comp->ArgumentAt(0)->value();
Value* right = comp->ArgumentAt(1)->value();
@ -328,12 +387,14 @@ bool FlowGraphOptimizer::TryReplaceWithBinaryOp(BindInstr* instr,
Value* right = comp->ArgumentAt(1)->value();
// Insert two smi checks and attach a copy of the original
// environment because the smi operation can still deoptimize.
InsertCheckBefore(instr,
new CheckSmiComp(left->CopyValue(), comp),
instr->env()->Copy());
InsertCheckBefore(instr,
new CheckSmiComp(right->CopyValue(), comp),
instr->env()->Copy());
InsertBefore(instr,
new CheckSmiComp(left->CopyValue(), comp),
instr->env(),
BindInstr::kUnused);
InsertBefore(instr,
new CheckSmiComp(right->CopyValue(), comp),
instr->env(),
BindInstr::kUnused);
BinarySmiOpComp* bin_op = new BinarySmiOpComp(op_kind,
comp,
left,
@ -548,7 +609,7 @@ void FlowGraphOptimizer::VisitInstanceCall(InstanceCallComp* comp,
// Type propagation has not run yet, we cannot eliminate the check.
CheckClassComp* check = new CheckClassComp(value, comp);
check->set_ic_data(&unary_checks);
InsertCheckBefore(instr, check, instr->env()->Copy());
InsertBefore(instr, check, instr->env(), BindInstr::kUnused);
// Call can still deoptimize, do not detach environment from instr.
call_with_checks = false;
} else {

View file

@ -15,8 +15,9 @@ template <typename T> class DirectChainedHashMap;
class FlowGraphOptimizer : public FlowGraphVisitor {
public:
explicit FlowGraphOptimizer(const FlowGraph& flow_graph)
: FlowGraphVisitor(flow_graph.reverse_postorder()) {}
explicit FlowGraphOptimizer(FlowGraph* flow_graph)
: FlowGraphVisitor(flow_graph->reverse_postorder()),
flow_graph_(flow_graph) { }
virtual ~FlowGraphOptimizer() {}
void ApplyICData();
@ -48,6 +49,20 @@ class FlowGraphOptimizer : public FlowGraphVisitor {
bool TryInlineInstanceMethod(BindInstr* instr, InstanceCallComp* comp);
void AddCheckClass(BindInstr* instr, InstanceCallComp* comp, Value* value);
BindInstr* InsertBefore(Instruction* instr,
Computation* comp,
Environment* env,
BindInstr::UseKind use_kind);
BindInstr* InsertAfter(Instruction* instr,
Computation* comp,
Environment* env,
BindInstr::UseKind use_kind);
FlowGraph* flow_graph_;
DISALLOW_COPY_AND_ASSIGN(FlowGraphOptimizer);
};

View file

@ -397,6 +397,14 @@ void BinaryMintOpComp::PrintOperandsTo(BufferFormatter* f) const {
}
void UnboxedDoubleBinaryOpComp::PrintOperandsTo(BufferFormatter* f) const {
f->Print("%s, ", Token::Str(op_kind()));
left()->PrintTo(f);
f->Print(", ");
right()->PrintTo(f);
}
void BinaryDoubleOpComp::PrintOperandsTo(BufferFormatter* f) const {
f->Print("%s", Token::Str(op_kind()));
}

View file

@ -188,7 +188,7 @@ Instruction* Instruction::RemoveFromGraph(bool return_previous) {
}
void BindInstr::InsertBefore(BindInstr* next) {
void BindInstr::InsertBefore(Instruction* next) {
ASSERT(previous_ == NULL);
ASSERT(next_ == NULL);
next_ = next;
@ -198,6 +198,16 @@ void BindInstr::InsertBefore(BindInstr* next) {
}
void BindInstr::InsertAfter(Instruction* prev) {
ASSERT(previous_ == NULL);
ASSERT(next_ == NULL);
previous_ = prev;
next_ = prev->next_;
next_->previous_ = this;
previous_->next_ = this;
}
void ForwardInstructionIterator::RemoveCurrentFromGraph() {
current_ = current_->RemoveFromGraph(true); // Set current_ to previous.
}
@ -1024,6 +1034,26 @@ intptr_t BinaryDoubleOpComp::ResultCid() const {
}
RawAbstractType* UnboxedDoubleBinaryOpComp::CompileType() const {
return Type::DoubleInterface();
}
RawAbstractType* UnboxDoubleComp::CompileType() const {
return Type::null();
}
intptr_t BoxDoubleComp::ResultCid() const {
return kDoubleCid;
}
RawAbstractType* BoxDoubleComp::CompileType() const {
return Type::DoubleInterface();
}
RawAbstractType* UnarySmiOpComp::CompileType() const {
return Type::SmiType();
}
@ -1055,6 +1085,11 @@ RawAbstractType* CheckSmiComp::CompileType() const {
}
RawAbstractType* CheckEitherNonSmiComp::CompileType() const {
return AbstractType::null();
}
// Optimizations that eliminate or simplify individual computations.
Definition* Computation::TryReplace(BindInstr* instr) const {
return instr;
@ -1090,6 +1125,15 @@ Definition* CheckSmiComp::TryReplace(BindInstr* instr) const {
}
Definition* CheckEitherNonSmiComp::TryReplace(BindInstr* instr) const {
if ((left()->ResultCid() == kDoubleCid) ||
(right()->ResultCid() == kDoubleCid)) {
return NULL; // Remove from the graph.
}
return instr;
}
// Shared code generation methods (EmitNativeCode, MakeLocationSummary, and
// PrepareEntry). Only assembly code that can be shared across all architectures
// can be used. Machine specific register allocation and code generation

View file

@ -104,7 +104,11 @@ class LocalVariable;
M(SmiToDouble, SmiToDoubleComp) \
M(CheckClass, CheckClassComp) \
M(CheckSmi, CheckSmiComp) \
M(Materialize, MaterializeComp)
M(Materialize, MaterializeComp) \
M(CheckEitherNonSmi, CheckEitherNonSmiComp) \
M(UnboxedDoubleBinaryOp, UnboxedDoubleBinaryOpComp) \
M(UnboxDouble, UnboxDoubleComp) \
M(BoxDouble, BoxDoubleComp)
#define FORWARD_DECLARATION(ShortName, ClassName) class ClassName;
@ -122,6 +126,12 @@ class Instruction;
class PushArgumentInstr;
class Value;
enum Representation {
kTagged, kUnboxedDouble
};
class Computation : public ZoneAllocated {
public:
Computation() : deopt_id_(Isolate::kNoDeoptId), ic_data_(NULL), locs_(NULL) {
@ -225,6 +235,10 @@ class Computation : public ZoneAllocated {
virtual ComputationKind computation_kind() const = 0;
virtual Representation representation() const {
return kTagged;
}
// Declare predicate for each computation.
#define DECLARE_PREDICATE(ShortName, ClassName) \
inline bool Is##ShortName() const; \
@ -1662,6 +1676,123 @@ class CatchEntryComp : public TemplateComputation<0> {
};
class CheckEitherNonSmiComp : public TemplateComputation<2> {
public:
CheckEitherNonSmiComp(Value* left,
Value* right,
InstanceCallComp* instance_call)
: instance_call_(instance_call) {
ASSERT(left != NULL);
ASSERT(right != NULL);
inputs_[0] = left;
inputs_[1] = right;
}
DECLARE_COMPUTATION(CheckEitherNonSmi)
virtual bool CanDeoptimize() const { return true; }
virtual bool HasSideEffect() const { return false; }
Value* left() const { return inputs_[0]; }
Value* right() const { return inputs_[1]; }
virtual Definition* TryReplace(BindInstr* instr) const;
private:
InstanceCallComp* instance_call_;
DISALLOW_COPY_AND_ASSIGN(CheckEitherNonSmiComp);
};
class BoxDoubleComp : public TemplateComputation<1> {
public:
BoxDoubleComp(Value* value, InstanceCallComp* instance_call)
: instance_call_(instance_call) {
ASSERT(value != NULL);
inputs_[0] = value;
}
Value* value() const { return inputs_[0]; }
InstanceCallComp* instance_call() const { return instance_call_; }
virtual bool CanDeoptimize() const { return false; }
virtual intptr_t ResultCid() const;
DECLARE_COMPUTATION(BoxDouble)
private:
InstanceCallComp* instance_call_;
DISALLOW_COPY_AND_ASSIGN(BoxDoubleComp);
};
class UnboxDoubleComp : public TemplateComputation<1> {
public:
UnboxDoubleComp(Value* value, InstanceCallComp* instance_call)
: instance_call_(instance_call) {
ASSERT(value != NULL);
inputs_[0] = value;
}
Value* value() const { return inputs_[0]; }
InstanceCallComp* instance_call() const { return instance_call_; }
virtual bool CanDeoptimize() const {
return value()->ResultCid() != kDoubleCid;
}
virtual Representation representation() const {
return kUnboxedDouble;
}
DECLARE_COMPUTATION(UnboxDouble)
private:
InstanceCallComp* instance_call_;
DISALLOW_COPY_AND_ASSIGN(UnboxDoubleComp);
};
class UnboxedDoubleBinaryOpComp : public TemplateComputation<2> {
public:
UnboxedDoubleBinaryOpComp(Token::Kind op_kind,
Value* left,
Value* right)
: op_kind_(op_kind) {
ASSERT(left != NULL);
ASSERT(right != NULL);
inputs_[0] = left;
inputs_[1] = right;
}
Value* left() const { return inputs_[0]; }
Value* right() const { return inputs_[1]; }
Token::Kind op_kind() const { return op_kind_; }
virtual void PrintOperandsTo(BufferFormatter* f) const;
virtual bool CanDeoptimize() const { return false; }
virtual Representation representation() const {
return kUnboxedDouble;
}
DECLARE_COMPUTATION(UnboxedDoubleBinaryOp)
private:
const Token::Kind op_kind_;
DISALLOW_COPY_AND_ASSIGN(UnboxedDoubleBinaryOpComp);
};
class BinarySmiOpComp : public TemplateComputation<2> {
public:
BinarySmiOpComp(Token::Kind op_kind,
@ -2135,6 +2266,10 @@ FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
lifetime_position_ = pos;
}
virtual Representation representation() const {
return kTagged;
}
private:
friend class BindInstr; // Needed for BindInstr::InsertBefore.
@ -2689,7 +2824,14 @@ class BindInstr : public Definition {
virtual void EmitNativeCode(FlowGraphCompiler* compiler);
// Insert this instruction before 'next'.
void InsertBefore(BindInstr* next);
void InsertBefore(Instruction* next);
// Insert this instruction after 'prev'.
void InsertAfter(Instruction* prev);
virtual Representation representation() const {
return computation()->representation();
}
private:
Computation* computation_;

View file

@ -1766,6 +1766,158 @@ void BinaryDoubleOpComp::EmitNativeCode(FlowGraphCompiler* compiler) {
}
LocationSummary* CheckEitherNonSmiComp::MakeLocationSummary() const {
ASSERT((left()->ResultCid() != kDoubleCid) &&
(right()->ResultCid() != kDoubleCid));
const intptr_t kNumInputs = 2;
const intptr_t kNumTemps = 1;
LocationSummary* summary =
new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
summary->set_in(0, Location::RequiresRegister());
summary->set_in(1, Location::RequiresRegister());
summary->set_temp(0, Location::RequiresRegister());
return summary;
}
void CheckEitherNonSmiComp::EmitNativeCode(FlowGraphCompiler* compiler) {
Label* deopt = compiler->AddDeoptStub(instance_call_->deopt_id(),
instance_call_->try_index(),
kDeoptBinaryDoubleOp);
Register temp = locs()->temp(0).reg();
__ movl(temp, locs()->in(0).reg());
__ orl(temp, locs()->in(1).reg());
__ testl(temp, Immediate(kSmiTagMask));
__ j(ZERO, deopt);
}
LocationSummary* BoxDoubleComp::MakeLocationSummary() const {
const intptr_t kNumInputs = 1;
const intptr_t kNumTemps = 0;
LocationSummary* summary =
new LocationSummary(kNumInputs,
kNumTemps,
LocationSummary::kCallOnSlowPath);
summary->set_in(0, Location::RequiresXmmRegister());
summary->set_out(Location::RequiresRegister());
return summary;
}
class BoxDoubleSlowPath : public SlowPathCode {
public:
explicit BoxDoubleSlowPath(BoxDoubleComp* computation)
: computation_(computation) { }
virtual void EmitNativeCode(FlowGraphCompiler* compiler) {
__ Bind(entry_label());
const Class& double_class = compiler->double_class();
const Code& stub =
Code::Handle(StubCode::GetAllocationStubForClass(double_class));
const ExternalLabel label(double_class.ToCString(), stub.EntryPoint());
// TODO(vegorov): here stack map needs to be set up correctly to skip
// double registers.
LocationSummary* locs = computation_->locs();
locs->live_registers()->Remove(locs->out());
compiler->SaveLiveRegisters(locs);
compiler->GenerateCall(computation_->instance_call()->token_pos(),
computation_->instance_call()->try_index(),
&label,
PcDescriptors::kOther,
locs);
if (EAX != locs->out().reg()) __ movl(locs->out().reg(), EAX);
compiler->RestoreLiveRegisters(locs);
__ jmp(exit_label());
}
private:
BoxDoubleComp* computation_;
};
void BoxDoubleComp::EmitNativeCode(FlowGraphCompiler* compiler) {
BoxDoubleSlowPath* slow_path = new BoxDoubleSlowPath(this);
compiler->AddSlowPathCode(slow_path);
Register out_reg = locs()->out().reg();
XmmRegister value = locs()->in(0).xmm_reg();
AssemblerMacros::TryAllocate(compiler->assembler(),
compiler->double_class(),
slow_path->entry_label(),
Assembler::kFarJump,
out_reg);
__ Bind(slow_path->exit_label());
__ movsd(FieldAddress(out_reg, Double::value_offset()), value);
}
LocationSummary* UnboxDoubleComp::MakeLocationSummary() const {
const intptr_t v_cid = value()->ResultCid();
const intptr_t kNumInputs = 1;
const intptr_t kNumTemps = (v_cid != kDoubleCid) ? 1 : 0;
LocationSummary* summary =
new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
summary->set_in(0, Location::RequiresRegister());
if (v_cid != kDoubleCid) summary->set_temp(0, Location::RequiresRegister());
summary->set_out(Location::RequiresXmmRegister());
return summary;
}
void UnboxDoubleComp::EmitNativeCode(FlowGraphCompiler* compiler) {
const intptr_t v_cid = value()->ResultCid();
const Register value = locs()->in(0).reg();
const XmmRegister result = locs()->out().xmm_reg();
if (v_cid != kDoubleCid) {
Label* deopt = compiler->AddDeoptStub(instance_call()->deopt_id(),
instance_call()->try_index(),
kDeoptBinaryDoubleOp);
compiler->LoadDoubleOrSmiToXmm(result,
value,
locs()->temp(0).reg(),
deopt);
} else {
__ movsd(result, FieldAddress(value, Double::value_offset()));
}
}
LocationSummary* UnboxedDoubleBinaryOpComp::MakeLocationSummary() const {
const intptr_t kNumInputs = 2;
const intptr_t kNumTemps = 0;
LocationSummary* summary =
new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
summary->set_in(0, Location::RequiresXmmRegister());
summary->set_in(1, Location::RequiresXmmRegister());
summary->set_out(Location::SameAsFirstInput());
return summary;
}
void UnboxedDoubleBinaryOpComp::EmitNativeCode(FlowGraphCompiler* compiler) {
XmmRegister left = locs()->in(0).xmm_reg();
XmmRegister right = locs()->in(1).xmm_reg();
ASSERT(locs()->out().xmm_reg() == left);
switch (op_kind()) {
case Token::kADD: __ addsd(left, right); break;
case Token::kSUB: __ subsd(left, right); break;
case Token::kMUL: __ mulsd(left, right); break;
case Token::kDIV: __ divsd(left, right); break;
default: UNREACHABLE();
}
}
LocationSummary* UnarySmiOpComp::MakeLocationSummary() const {
const intptr_t kNumInputs = 1;
const intptr_t kNumTemps = 0;

View file

@ -1779,6 +1779,158 @@ void BinaryDoubleOpComp::EmitNativeCode(FlowGraphCompiler* compiler) {
}
LocationSummary* CheckEitherNonSmiComp::MakeLocationSummary() const {
ASSERT((left()->ResultCid() != kDoubleCid) &&
(right()->ResultCid() != kDoubleCid));
const intptr_t kNumInputs = 2;
const intptr_t kNumTemps = 1;
LocationSummary* summary =
new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
summary->set_in(0, Location::RequiresRegister());
summary->set_in(1, Location::RequiresRegister());
summary->set_temp(0, Location::RequiresRegister());
return summary;
}
void CheckEitherNonSmiComp::EmitNativeCode(FlowGraphCompiler* compiler) {
Label* deopt = compiler->AddDeoptStub(instance_call_->deopt_id(),
instance_call_->try_index(),
kDeoptBinaryDoubleOp);
Register temp = locs()->temp(0).reg();
__ movq(temp, locs()->in(0).reg());
__ orq(temp, locs()->in(1).reg());
__ testl(temp, Immediate(kSmiTagMask));
__ j(ZERO, deopt);
}
LocationSummary* BoxDoubleComp::MakeLocationSummary() const {
const intptr_t kNumInputs = 1;
const intptr_t kNumTemps = 0;
LocationSummary* summary =
new LocationSummary(kNumInputs,
kNumTemps,
LocationSummary::kCallOnSlowPath);
summary->set_in(0, Location::RequiresXmmRegister());
summary->set_out(Location::RequiresRegister());
return summary;
}
class BoxDoubleSlowPath : public SlowPathCode {
public:
explicit BoxDoubleSlowPath(BoxDoubleComp* computation)
: computation_(computation) { }
virtual void EmitNativeCode(FlowGraphCompiler* compiler) {
__ Bind(entry_label());
const Class& double_class = compiler->double_class();
const Code& stub =
Code::Handle(StubCode::GetAllocationStubForClass(double_class));
const ExternalLabel label(double_class.ToCString(), stub.EntryPoint());
// TODO(vegorov): here stack map needs to be set up correctly to skip
// double registers.
LocationSummary* locs = computation_->locs();
locs->live_registers()->Remove(locs->out());
compiler->SaveLiveRegisters(locs);
compiler->GenerateCall(computation_->instance_call()->token_pos(),
computation_->instance_call()->try_index(),
&label,
PcDescriptors::kOther,
locs);
if (RAX != locs->out().reg()) __ movq(locs->out().reg(), RAX);
compiler->RestoreLiveRegisters(locs);
__ jmp(exit_label());
}
private:
BoxDoubleComp* computation_;
};
void BoxDoubleComp::EmitNativeCode(FlowGraphCompiler* compiler) {
BoxDoubleSlowPath* slow_path = new BoxDoubleSlowPath(this);
compiler->AddSlowPathCode(slow_path);
Register out_reg = locs()->out().reg();
XmmRegister value = locs()->in(0).xmm_reg();
AssemblerMacros::TryAllocate(compiler->assembler(),
compiler->double_class(),
slow_path->entry_label(),
Assembler::kFarJump,
out_reg);
__ Bind(slow_path->exit_label());
__ movsd(FieldAddress(out_reg, Double::value_offset()), value);
}
LocationSummary* UnboxDoubleComp::MakeLocationSummary() const {
const intptr_t v_cid = value()->ResultCid();
const intptr_t kNumInputs = 1;
const intptr_t kNumTemps = (v_cid != kDoubleCid) ? 1 : 0;
LocationSummary* summary =
new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
summary->set_in(0, Location::RequiresRegister());
if (v_cid != kDoubleCid) summary->set_temp(0, Location::RequiresRegister());
summary->set_out(Location::RequiresXmmRegister());
return summary;
}
void UnboxDoubleComp::EmitNativeCode(FlowGraphCompiler* compiler) {
const intptr_t v_cid = value()->ResultCid();
const Register value = locs()->in(0).reg();
const XmmRegister result = locs()->out().xmm_reg();
if (v_cid != kDoubleCid) {
Label* deopt = compiler->AddDeoptStub(instance_call()->deopt_id(),
instance_call()->try_index(),
kDeoptBinaryDoubleOp);
compiler->LoadDoubleOrSmiToXmm(result,
value,
locs()->temp(0).reg(),
deopt);
} else {
__ movsd(result, FieldAddress(value, Double::value_offset()));
}
}
LocationSummary* UnboxedDoubleBinaryOpComp::MakeLocationSummary() const {
const intptr_t kNumInputs = 2;
const intptr_t kNumTemps = 0;
LocationSummary* summary =
new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
summary->set_in(0, Location::RequiresXmmRegister());
summary->set_in(1, Location::RequiresXmmRegister());
summary->set_out(Location::SameAsFirstInput());
return summary;
}
void UnboxedDoubleBinaryOpComp::EmitNativeCode(FlowGraphCompiler* compiler) {
XmmRegister left = locs()->in(0).xmm_reg();
XmmRegister right = locs()->in(1).xmm_reg();
ASSERT(locs()->out().xmm_reg() == left);
switch (op_kind()) {
case Token::kADD: __ addsd(left, right); break;
case Token::kSUB: __ subsd(left, right); break;
case Token::kMUL: __ mulsd(left, right); break;
case Token::kDIV: __ divsd(left, right); break;
default: UNREACHABLE();
}
}
LocationSummary* UnarySmiOpComp::MakeLocationSummary() const {
const intptr_t kNumInputs = 1;
const intptr_t kNumTemps = 0;

View file

@ -785,6 +785,7 @@ bool Intrinsifier::Integer_shl(Assembler* assembler) {
AssemblerMacros::TryAllocate(assembler,
mint_class,
&fall_through,
Assembler::kNearJump,
EAX); // Result register.
// EBX and EDI are not objects but integer values.
__ movl(FieldAddress(EAX, Mint::value_offset()), EBX);
@ -1040,6 +1041,7 @@ static bool DoubleArithmeticOperations(Assembler* assembler, Token::Kind kind) {
AssemblerMacros::TryAllocate(assembler,
double_class,
&fall_through,
Assembler::kNearJump,
EAX); // Result register.
__ movsd(FieldAddress(EAX, Double::value_offset()), XMM0);
__ ret();
@ -1086,6 +1088,7 @@ bool Intrinsifier::Double_mulFromInteger(Assembler* assembler) {
AssemblerMacros::TryAllocate(assembler,
double_class,
&fall_through,
Assembler::kNearJump,
EAX); // Result register.
__ movsd(FieldAddress(EAX, Double::value_offset()), XMM0);
__ ret();
@ -1107,6 +1110,7 @@ bool Intrinsifier::Double_fromInteger(Assembler* assembler) {
AssemblerMacros::TryAllocate(assembler,
double_class,
&fall_through,
Assembler::kNearJump,
EAX); // Result register.
__ movsd(FieldAddress(EAX, Double::value_offset()), XMM0);
__ ret();
@ -1172,6 +1176,7 @@ bool Intrinsifier::Math_sqrt(Assembler* assembler) {
AssemblerMacros::TryAllocate(assembler,
double_class,
&fall_through,
Assembler::kNearJump,
EAX); // Result register.
__ movsd(FieldAddress(EAX, Double::value_offset()), XMM0);
__ ret();
@ -1209,6 +1214,7 @@ static void EmitTrigonometric(Assembler* assembler,
AssemblerMacros::TryAllocate(assembler,
double_class,
&alloc_failed,
Assembler::kNearJump,
EAX); // Result register.
__ fstpl(FieldAddress(EAX, Double::value_offset()));
__ ret();

View file

@ -985,6 +985,7 @@ static bool DoubleArithmeticOperations(Assembler* assembler, Token::Kind kind) {
AssemblerMacros::TryAllocate(assembler,
double_class,
&fall_through,
Assembler::kNearJump,
RAX); // Result register.
__ movsd(FieldAddress(RAX, Double::value_offset()), XMM0);
__ ret();
@ -1030,6 +1031,7 @@ bool Intrinsifier::Double_mulFromInteger(Assembler* assembler) {
AssemblerMacros::TryAllocate(assembler,
double_class,
&fall_through,
Assembler::kNearJump,
RAX); // Result register.
__ movsd(FieldAddress(RAX, Double::value_offset()), XMM0);
__ ret();
@ -1052,6 +1054,7 @@ bool Intrinsifier::Double_fromInteger(Assembler* assembler) {
AssemblerMacros::TryAllocate(assembler,
double_class,
&fall_through,
Assembler::kNearJump,
RAX); // Result register.
__ movsd(FieldAddress(RAX, Double::value_offset()), XMM0);
__ ret();
@ -1129,6 +1132,7 @@ static void EmitTrigonometric(Assembler* assembler,
AssemblerMacros::TryAllocate(assembler,
double_class,
&alloc_failed,
Assembler::kNearJump,
RAX); // Result register.
__ fstpl(FieldAddress(RAX, Double::value_offset()));
__ ret();
@ -1160,6 +1164,7 @@ bool Intrinsifier::Math_sqrt(Assembler* assembler) {
AssemblerMacros::TryAllocate(assembler,
double_class,
&fall_through,
Assembler::kNearJump,
RAX); // Result register.
__ movsd(FieldAddress(RAX, Double::value_offset()), XMM0);
__ ret();

View file

@ -49,7 +49,9 @@ const char* Location::Name() const {
switch (kind()) {
case kInvalid: return "?";
case kRegister: return Assembler::RegisterName(reg());
case kXmmRegister: return Assembler::XmmRegisterName(xmm_reg());
case kStackSlot: return "S";
case kDoubleStackSlot: return "DS";
case kUnallocated:
switch (policy()) {
case kAny:
@ -58,6 +60,8 @@ const char* Location::Name() const {
return "P";
case kRequiresRegister:
return "R";
case kRequiresXmmRegister:
return "DR";
case kSameAsFirstInput:
return "0";
}

View file

@ -35,6 +35,9 @@ class Location : public ValueObject {
static const intptr_t kStackIndexBias =
static_cast<intptr_t>(1) << (kBitsForPayload - 1);
static const intptr_t kMachineRegisterMask = 0x6;
static const intptr_t kMachineRegister = 0x6;
public:
// Constant payload can overlap with kind field so Kind values
// have to be chosen in a way that their last 2 bits are never
@ -52,13 +55,18 @@ class Location : public ValueObject {
// contains register allocation policy.
kUnallocated = 2,
// Register location represents a fixed register. Payload contains
// register code.
kRegister = 3,
// Spill slot allocated by the register allocator. Payload contains
// a spill index.
kStackSlot = 4,
kStackSlot = 3,
kDoubleStackSlot = 4,
// Register location represents a fixed register. Payload contains
// register code.
kRegister = 6,
// XmmRegister location represents a fixed xmm register. Payload contains
// its code.
kXmmRegister = 7,
};
Location() : value_(kInvalidLocation) {
@ -92,6 +100,7 @@ class Location : public ValueObject {
kAny,
kPrefersRegister,
kRequiresRegister,
kRequiresXmmRegister,
kSameAsFirstInput,
};
@ -120,6 +129,10 @@ class Location : public ValueObject {
return UnallocatedLocation(kRequiresRegister);
}
static Location RequiresXmmRegister() {
return UnallocatedLocation(kRequiresXmmRegister);
}
// The location of the first input to the instruction will be
// used to replace this unallocated location.
static Location SameAsFirstInput() {
@ -150,6 +163,37 @@ class Location : public ValueObject {
return static_cast<Register>(payload());
}
// XmmRegister locations.
static Location XmmRegisterLocation(XmmRegister reg) {
return Location(kXmmRegister, static_cast<uword>(reg));
}
bool IsXmmRegister() const {
return kind() == kXmmRegister;
}
XmmRegister xmm_reg() const {
ASSERT(IsXmmRegister());
return static_cast<XmmRegister>(payload());
}
static bool IsMachineRegisterKind(Kind kind) {
return (kind & kMachineRegisterMask) == kMachineRegister;
}
static Location MachineRegisterLocation(Kind kind, intptr_t reg) {
return Location(kind, reg);
}
bool IsMachineRegister() const {
return IsMachineRegisterKind(kind());
}
intptr_t register_code() const {
ASSERT(IsMachineRegister());
return static_cast<intptr_t>(payload());
}
// Spill slots.
static Location StackSlot(intptr_t stack_index) {
ASSERT((-kStackIndexBias <= stack_index) &&
@ -164,8 +208,23 @@ class Location : public ValueObject {
return kind() == kStackSlot;
}
static Location DoubleStackSlot(intptr_t stack_index) {
ASSERT((-kStackIndexBias <= stack_index) &&
(stack_index < kStackIndexBias));
Location loc(kDoubleStackSlot,
static_cast<uword>(kStackIndexBias + stack_index));
// Ensure that sign is preserved.
ASSERT(loc.stack_index() == stack_index);
return loc;
}
bool IsDoubleStackSlot() const {
return kind() == kDoubleStackSlot;
}
intptr_t stack_index() const {
ASSERT(IsStackSlot());
ASSERT(IsStackSlot() || IsDoubleStackSlot());
// Decode stack index manually to preserve sign.
return payload() - kStackIndexBias;
}
@ -199,7 +258,7 @@ class Location : public ValueObject {
typedef BitField<uword, kBitsForKind, kBitsForPayload> PayloadField;
// Layout for kUnallocated locations payload.
typedef BitField<Policy, 0, 2> PolicyField;
typedef BitField<Policy, 0, 3> PolicyField;
// Location either contains kind and payload fields or a tagged handle for
// a constant locations. Values of enumeration Kind are selected in such a
@ -210,20 +269,49 @@ class Location : public ValueObject {
class RegisterSet : public ValueObject {
public:
RegisterSet() : registers_(0) {
RegisterSet() : cpu_registers_(0), xmm_registers_(0) {
ASSERT(kNumberOfCpuRegisters < (kWordSize * kBitsPerByte));
ASSERT(kNumberOfXmmRegisters < (kWordSize * kBitsPerByte));
}
void Add(Register reg) {
registers_ |= (1 << reg);
void Add(Location loc) {
if (loc.IsRegister()) {
cpu_registers_ |= (1 << loc.reg());
} else if (loc.IsXmmRegister()) {
xmm_registers_ |= (1 << loc.xmm_reg());
}
}
bool Contains(Register reg) {
return (registers_ & (1 << reg)) != 0;
void Remove(Location loc) {
if (loc.IsRegister()) {
cpu_registers_ &= ~(1 << loc.reg());
} else if (loc.IsXmmRegister()) {
xmm_registers_ &= ~(1 << loc.xmm_reg());
}
}
bool ContainsRegister(Register reg) {
return (cpu_registers_ & (1 << reg)) != 0;
}
bool ContainsXmmRegister(XmmRegister xmm_reg) {
return (xmm_registers_ & (1 << xmm_reg)) != 0;
}
intptr_t xmm_regs_count() {
intptr_t count = 0;
for (intptr_t reg_idx = 0; reg_idx < kNumberOfXmmRegisters; reg_idx++) {
if (ContainsXmmRegister(static_cast<XmmRegister>(reg_idx))) {
count++;
}
}
return count;
}
private:
intptr_t registers_;
intptr_t cpu_registers_;
intptr_t xmm_registers_;
DISALLOW_COPY_AND_ASSIGN(RegisterSet);
};