Merge branch 'topic/ppc-kvm' into next

Merge our ppc-kvm topic branch. This contains several fixes for the XIVE
interrupt controller that we are sharing with the KVM tree.
This commit is contained in:
Michael Ellerman 2019-08-19 13:19:43 +10:00
commit 1a47908e0f
6 changed files with 167 additions and 54 deletions

View file

@ -46,7 +46,15 @@ struct xive_irq_data {
/* Setup/used by frontend */
int target;
/*
* saved_p means that there is a queue entry for this interrupt
* in some CPU's queue (not including guest vcpu queues), even
* if P is not set in the source ESB.
* stale_p means that there is no queue entry for this interrupt
* in some CPU's queue, even if P is set in the source ESB.
*/
bool saved_p;
bool stale_p;
};
#define XIVE_IRQ_FLAG_STORE_EOI 0x01
#define XIVE_IRQ_FLAG_LSI 0x02

View file

@ -942,6 +942,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
ld r11, VCPU_XIVE_SAVED_STATE(r4)
li r9, TM_QW1_OS
lwz r8, VCPU_XIVE_CAM_WORD(r4)
cmpwi r8, 0
beq no_xive
li r7, TM_QW1_OS + TM_WORD2
mfmsr r0
andi. r0, r0, MSR_DR /* in real mode? */
@ -2831,29 +2833,39 @@ kvm_cede_prodded:
kvm_cede_exit:
ld r9, HSTATE_KVM_VCPU(r13)
#ifdef CONFIG_KVM_XICS
/* Abort if we still have a pending escalation */
lbz r5, VCPU_XIVE_ESC_ON(r9)
cmpwi r5, 0
beq 1f
li r0, 0
stb r0, VCPU_CEDED(r9)
1: /* Enable XIVE escalation */
li r5, XIVE_ESB_SET_PQ_00
mfmsr r0
andi. r0, r0, MSR_DR /* in real mode? */
beq 1f
/* are we using XIVE with single escalation? */
ld r10, VCPU_XIVE_ESC_VADDR(r9)
cmpdi r10, 0
beq 3f
ldx r0, r10, r5
li r6, XIVE_ESB_SET_PQ_00
/*
* If we still have a pending escalation, abort the cede,
* and we must set PQ to 10 rather than 00 so that we don't
* potentially end up with two entries for the escalation
* interrupt in the XIVE interrupt queue. In that case
* we also don't want to set xive_esc_on to 1 here in
* case we race with xive_esc_irq().
*/
lbz r5, VCPU_XIVE_ESC_ON(r9)
cmpwi r5, 0
beq 4f
li r0, 0
stb r0, VCPU_CEDED(r9)
li r6, XIVE_ESB_SET_PQ_10
b 5f
4: li r0, 1
stb r0, VCPU_XIVE_ESC_ON(r9)
/* make sure store to xive_esc_on is seen before xive_esc_irq runs */
sync
5: /* Enable XIVE escalation */
mfmsr r0
andi. r0, r0, MSR_DR /* in real mode? */
beq 1f
ldx r0, r10, r6
b 2f
1: ld r10, VCPU_XIVE_ESC_RADDR(r9)
cmpdi r10, 0
beq 3f
ldcix r0, r10, r5
ldcix r0, r10, r6
2: sync
li r0, 1
stb r0, VCPU_XIVE_ESC_ON(r9)
#endif /* CONFIG_KVM_XICS */
3: b guest_exit_cont

View file

@ -67,8 +67,14 @@ void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
u64 pq;
if (!tima)
/*
* Nothing to do if the platform doesn't have a XIVE
* or this vCPU doesn't have its own XIVE context
* (e.g. because it's not using an in-kernel interrupt controller).
*/
if (!tima || !vcpu->arch.xive_cam_word)
return;
eieio();
__raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
__raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2);
@ -160,6 +166,9 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
*/
vcpu->arch.xive_esc_on = false;
/* This orders xive_esc_on = false vs. subsequent stale_p = true */
smp_wmb(); /* goes with smp_mb() in cleanup_single_escalation */
return IRQ_HANDLED;
}
@ -1113,6 +1122,31 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
vcpu->arch.xive_esc_raddr = 0;
}
/*
* In single escalation mode, the escalation interrupt is marked so
* that EOI doesn't re-enable it, but just sets the stale_p flag to
* indicate that the P bit has already been dealt with. However, the
* assembly code that enters the guest sets PQ to 00 without clearing
* stale_p (because it has no easy way to address it). Hence we have
* to adjust stale_p before shutting down the interrupt.
*/
void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
struct kvmppc_xive_vcpu *xc, int irq)
{
struct irq_data *d = irq_get_irq_data(irq);
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
/*
* This slightly odd sequence gives the right result
* (i.e. stale_p set if xive_esc_on is false) even if
* we race with xive_esc_irq() and xive_irq_eoi().
*/
xd->stale_p = false;
smp_mb(); /* paired with smb_wmb in xive_esc_irq */
if (!vcpu->arch.xive_esc_on)
xd->stale_p = true;
}
void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
@ -1134,20 +1168,28 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
/* Mask the VP IPI */
xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_01);
/* Disable the VP */
xive_native_disable_vp(xc->vp_id);
/* Free the queues & associated interrupts */
/* Free escalations */
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
struct xive_q *q = &xc->queues[i];
/* Free the escalation irq */
if (xc->esc_virq[i]) {
if (xc->xive->single_escalation)
xive_cleanup_single_escalation(vcpu, xc,
xc->esc_virq[i]);
free_irq(xc->esc_virq[i], vcpu);
irq_dispose_mapping(xc->esc_virq[i]);
kfree(xc->esc_virq_names[i]);
}
/* Free the queue */
}
/* Disable the VP */
xive_native_disable_vp(xc->vp_id);
/* Clear the cam word so guest entry won't try to push context */
vcpu->arch.xive_cam_word = 0;
/* Free the queues */
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
struct xive_q *q = &xc->queues[i];
xive_native_disable_queue(xc->vp_id, q, i);
if (q->qpage) {
free_pages((unsigned long)q->qpage,

View file

@ -282,6 +282,8 @@ int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio);
int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
bool single_escalation);
struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type);
void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
struct kvmppc_xive_vcpu *xc, int irq);
#endif /* CONFIG_KVM_XICS */
#endif /* _KVM_PPC_BOOK3S_XICS_H */

View file

@ -67,20 +67,28 @@ void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
xc->valid = false;
kvmppc_xive_disable_vcpu_interrupts(vcpu);
/* Disable the VP */
xive_native_disable_vp(xc->vp_id);
/* Free the queues & associated interrupts */
/* Free escalations */
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
/* Free the escalation irq */
if (xc->esc_virq[i]) {
if (xc->xive->single_escalation)
xive_cleanup_single_escalation(vcpu, xc,
xc->esc_virq[i]);
free_irq(xc->esc_virq[i], vcpu);
irq_dispose_mapping(xc->esc_virq[i]);
kfree(xc->esc_virq_names[i]);
xc->esc_virq[i] = 0;
}
}
/* Free the queue */
/* Disable the VP */
xive_native_disable_vp(xc->vp_id);
/* Clear the cam word so guest entry won't try to push context */
vcpu->arch.xive_cam_word = 0;
/* Free the queues */
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
kvmppc_xive_native_cleanup_queue(vcpu, i);
}

View file

@ -135,7 +135,7 @@ static u32 xive_read_eq(struct xive_q *q, bool just_peek)
static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek)
{
u32 irq = 0;
u8 prio;
u8 prio = 0;
/* Find highest pending priority */
while (xc->pending_prio != 0) {
@ -148,8 +148,19 @@ static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek)
irq = xive_read_eq(&xc->queue[prio], just_peek);
/* Found something ? That's it */
if (irq)
break;
if (irq) {
if (just_peek || irq_to_desc(irq))
break;
/*
* We should never get here; if we do then we must
* have failed to synchronize the interrupt properly
* when shutting it down.
*/
pr_crit("xive: got interrupt %d without descriptor, dropping\n",
irq);
WARN_ON(1);
continue;
}
/* Clear pending bits */
xc->pending_prio &= ~(1 << prio);
@ -307,6 +318,7 @@ static void xive_do_queue_eoi(struct xive_cpu *xc)
*/
static void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
{
xd->stale_p = false;
/* If the XIVE supports the new "store EOI facility, use it */
if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0);
@ -350,7 +362,7 @@ static void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
}
}
/* irq_chip eoi callback */
/* irq_chip eoi callback, called with irq descriptor lock held */
static void xive_irq_eoi(struct irq_data *d)
{
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
@ -366,6 +378,8 @@ static void xive_irq_eoi(struct irq_data *d)
if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) &&
!(xd->flags & XIVE_IRQ_NO_EOI))
xive_do_source_eoi(irqd_to_hwirq(d), xd);
else
xd->stale_p = true;
/*
* Clear saved_p to indicate that it's no longer occupying
@ -397,11 +411,16 @@ static void xive_do_source_set_mask(struct xive_irq_data *xd,
*/
if (mask) {
val = xive_esb_read(xd, XIVE_ESB_SET_PQ_01);
xd->saved_p = !!(val & XIVE_ESB_VAL_P);
} else if (xd->saved_p)
if (!xd->stale_p && !!(val & XIVE_ESB_VAL_P))
xd->saved_p = true;
xd->stale_p = false;
} else if (xd->saved_p) {
xive_esb_read(xd, XIVE_ESB_SET_PQ_10);
else
xd->saved_p = false;
} else {
xive_esb_read(xd, XIVE_ESB_SET_PQ_00);
xd->stale_p = false;
}
}
/*
@ -541,6 +560,8 @@ static unsigned int xive_irq_startup(struct irq_data *d)
unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
int target, rc;
xd->saved_p = false;
xd->stale_p = false;
pr_devel("xive_irq_startup: irq %d [0x%x] data @%p\n",
d->irq, hw_irq, d);
@ -587,6 +608,7 @@ static unsigned int xive_irq_startup(struct irq_data *d)
return 0;
}
/* called with irq descriptor lock held */
static void xive_irq_shutdown(struct irq_data *d)
{
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
@ -601,16 +623,6 @@ static void xive_irq_shutdown(struct irq_data *d)
/* Mask the interrupt at the source */
xive_do_source_set_mask(xd, true);
/*
* The above may have set saved_p. We clear it otherwise it
* will prevent re-enabling later on. It is ok to forget the
* fact that the interrupt might be in a queue because we are
* accounting that already in xive_dec_target_count() and will
* be re-routing it to a new queue with proper accounting when
* it's started up again
*/
xd->saved_p = false;
/*
* Mask the interrupt in HW in the IVT/EAS and set the number
* to be the "bad" IRQ number
@ -797,6 +809,10 @@ static int xive_irq_retrigger(struct irq_data *d)
return 1;
}
/*
* Caller holds the irq descriptor lock, so this won't be called
* concurrently with xive_get_irqchip_state on the same interrupt.
*/
static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
{
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
@ -820,6 +836,10 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
/* Set it to PQ=10 state to prevent further sends */
pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_10);
if (!xd->stale_p) {
xd->saved_p = !!(pq & XIVE_ESB_VAL_P);
xd->stale_p = !xd->saved_p;
}
/* No target ? nothing to do */
if (xd->target == XIVE_INVALID_TARGET) {
@ -827,7 +847,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
* An untargetted interrupt should have been
* also masked at the source
*/
WARN_ON(pq & 2);
WARN_ON(xd->saved_p);
return 0;
}
@ -847,9 +867,8 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
* This saved_p is cleared by the host EOI, when we know
* for sure the queue slot is no longer in use.
*/
if (pq & 2) {
pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
xd->saved_p = true;
if (xd->saved_p) {
xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
/*
* Sync the XIVE source HW to ensure the interrupt
@ -862,8 +881,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
*/
if (xive_ops->sync_source)
xive_ops->sync_source(hw_irq);
} else
xd->saved_p = false;
}
} else {
irqd_clr_forwarded_to_vcpu(d);
@ -914,6 +932,23 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
return 0;
}
/* Called with irq descriptor lock held. */
static int xive_get_irqchip_state(struct irq_data *data,
enum irqchip_irq_state which, bool *state)
{
struct xive_irq_data *xd = irq_data_get_irq_handler_data(data);
switch (which) {
case IRQCHIP_STATE_ACTIVE:
*state = !xd->stale_p &&
(xd->saved_p ||
!!(xive_esb_read(xd, XIVE_ESB_GET) & XIVE_ESB_VAL_P));
return 0;
default:
return -EINVAL;
}
}
static struct irq_chip xive_irq_chip = {
.name = "XIVE-IRQ",
.irq_startup = xive_irq_startup,
@ -925,6 +960,7 @@ static struct irq_chip xive_irq_chip = {
.irq_set_type = xive_irq_set_type,
.irq_retrigger = xive_irq_retrigger,
.irq_set_vcpu_affinity = xive_irq_set_vcpu_affinity,
.irq_get_irqchip_state = xive_get_irqchip_state,
};
bool is_xive_irq(struct irq_chip *chip)
@ -1337,6 +1373,11 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc)
raw_spin_lock(&desc->lock);
xd = irq_desc_get_handler_data(desc);
/*
* Clear saved_p to indicate that it's no longer pending
*/
xd->saved_p = false;
/*
* For LSIs, we EOI, this will cause a resend if it's
* still asserted. Otherwise do an MSI retrigger.