KVM: PPC: Book3S HV: Enable use of the new XIVE "single escalation" feature

That feature, provided by Power9 DD2.0 and later, when supported
by newer OPAL versions, allows us to sacrifice a queue (priority 7)
in favor of merging all the escalation interrupts of the queues
of a single VP into a single interrupt.

This reduces the number of host interrupts used up by KVM guests
especially when those guests use multiple priorities.

It will also enable a future change to control the masking of the
escalation interrupts more precisely to avoid spurious ones.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
Benjamin Herrenschmidt 2018-01-12 13:37:12 +11:00 committed by Paul Mackerras
parent c424c10823
commit bf4159da47
5 changed files with 57 additions and 28 deletions

View file

@ -1073,6 +1073,7 @@ enum {
/* Flags for OPAL_XIVE_GET/SET_VP_INFO */
enum {
OPAL_XIVE_VP_ENABLED = 0x00000001,
OPAL_XIVE_VP_SINGLE_ESCALATION = 0x00000002,
};
/* "Any chip" replacement for chip ID for allocation functions */

View file

@ -111,9 +111,10 @@ extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
extern void xive_native_sync_source(u32 hw_irq);
extern bool is_xive_irq(struct irq_chip *chip);
extern int xive_native_enable_vp(u32 vp_id);
extern int xive_native_enable_vp(u32 vp_id, bool single_escalation);
extern int xive_native_disable_vp(u32 vp_id);
extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
extern bool xive_native_has_single_escalation(void);
#else

View file

@ -112,19 +112,21 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
return -EIO;
}
/*
* Future improvement: start with them disabled
* and handle DD2 and later scheme of merged escalation
* interrupts
*/
name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
vcpu->kvm->arch.lpid, xc->server_num, prio);
if (xc->xive->single_escalation)
name = kasprintf(GFP_KERNEL, "kvm-%d-%d",
vcpu->kvm->arch.lpid, xc->server_num);
else
name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
vcpu->kvm->arch.lpid, xc->server_num, prio);
if (!name) {
pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n",
prio, xc->server_num);
rc = -ENOMEM;
goto error;
}
pr_devel("Escalation %s irq %d (prio %d)\n", name, xc->esc_virq[prio], prio);
rc = request_irq(xc->esc_virq[prio], xive_esc_irq,
IRQF_NO_THREAD, name, vcpu);
if (rc) {
@ -191,12 +193,12 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio)
pr_devel("Provisioning prio... %d\n", prio);
/* Provision each VCPU and enable escalations */
/* Provision each VCPU and enable escalations if needed */
kvm_for_each_vcpu(i, vcpu, kvm) {
if (!vcpu->arch.xive_vcpu)
continue;
rc = xive_provision_queue(vcpu, prio);
if (rc == 0)
if (rc == 0 && !xive->single_escalation)
xive_attach_escalation(vcpu, prio);
if (rc)
return rc;
@ -1081,6 +1083,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
/* Allocate IPI */
xc->vp_ipi = xive_native_alloc_irq();
if (!xc->vp_ipi) {
pr_err("Failed to allocate xive irq for VCPU IPI\n");
r = -EIO;
goto bail;
}
@ -1090,19 +1093,34 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
if (r)
goto bail;
/*
* Enable the VP first as the single escalation mode will
* affect escalation interrupts numbering
*/
r = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
if (r) {
pr_err("Failed to enable VP in OPAL, err %d\n", r);
goto bail;
}
/*
* Initialize queues. Initially we set them all for no queueing
* and we enable escalation for queue 0 only which we'll use for
* our mfrr change notifications. If the VCPU is hot-plugged, we
* do handle provisioning however.
* do handle provisioning however based on the existing "map"
* of enabled queues.
*/
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
struct xive_q *q = &xc->queues[i];
/* Single escalation, no queue 7 */
if (i == 7 && xive->single_escalation)
break;
/* Is queue already enabled ? Provision it */
if (xive->qmap & (1 << i)) {
r = xive_provision_queue(vcpu, i);
if (r == 0)
if (r == 0 && !xive->single_escalation)
xive_attach_escalation(vcpu, i);
if (r)
goto bail;
@ -1122,11 +1140,6 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
if (r)
goto bail;
/* Enable the VP */
r = xive_native_enable_vp(xc->vp_id);
if (r)
goto bail;
/* Route the IPI */
r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI);
if (!r)
@ -1473,6 +1486,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
pr_devel(" val=0x016%llx (server=0x%x, guest_prio=%d)\n",
val, server, guest_prio);
/*
* If the source doesn't already have an IPI, allocate
* one and get the corresponding data
@ -1761,6 +1775,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
if (xive->vp_base == XIVE_INVALID_VP)
ret = -ENOMEM;
xive->single_escalation = xive_native_has_single_escalation();
if (ret) {
kfree(xive);
return ret;

View file

@ -120,6 +120,8 @@ struct kvmppc_xive {
u32 q_order;
u32 q_page_order;
/* Flags */
u8 single_escalation;
};
#define KVMPPC_XIVE_Q_COUNT 8
@ -201,25 +203,20 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp
* is as follow.
*
* Guest request for 0...6 are honored. Guest request for anything
* higher results in a priority of 7 being applied.
*
* However, when XIRR is returned via H_XIRR, 7 is translated to 0xb
* in order to match AIX expectations
* higher results in a priority of 6 being applied.
*
* Similar mapping is done for CPPR values
*/
static inline u8 xive_prio_from_guest(u8 prio)
{
if (prio == 0xff || prio < 8)
if (prio == 0xff || prio < 6)
return prio;
return 7;
return 6;
}
static inline u8 xive_prio_to_guest(u8 prio)
{
if (prio == 0xff || prio < 7)
return prio;
return 0xb;
return prio;
}
static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle)

View file

@ -42,6 +42,7 @@ static u32 xive_provision_chip_count;
static u32 xive_queue_shift;
static u32 xive_pool_vps = XIVE_INVALID_VP;
static struct kmem_cache *xive_provision_cache;
static bool xive_has_single_esc;
int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
{
@ -571,6 +572,10 @@ bool __init xive_native_init(void)
break;
}
/* Do we support single escalation */
if (of_get_property(np, "single-escalation-support", NULL) != NULL)
xive_has_single_esc = true;
/* Configure Thread Management areas for KVM */
for_each_possible_cpu(cpu)
kvmppc_set_xive_tima(cpu, r.start, tima);
@ -667,12 +672,15 @@ void xive_native_free_vp_block(u32 vp_base)
}
EXPORT_SYMBOL_GPL(xive_native_free_vp_block);
int xive_native_enable_vp(u32 vp_id)
int xive_native_enable_vp(u32 vp_id, bool single_escalation)
{
s64 rc;
u64 flags = OPAL_XIVE_VP_ENABLED;
if (single_escalation)
flags |= OPAL_XIVE_VP_SINGLE_ESCALATION;
for (;;) {
rc = opal_xive_set_vp_info(vp_id, OPAL_XIVE_VP_ENABLED, 0);
rc = opal_xive_set_vp_info(vp_id, flags, 0);
if (rc != OPAL_BUSY)
break;
msleep(1);
@ -710,3 +718,9 @@ int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id)
return 0;
}
EXPORT_SYMBOL_GPL(xive_native_get_vp_info);
bool xive_native_has_single_escalation(void)
{
return xive_has_single_esc;
}
EXPORT_SYMBOL_GPL(xive_native_has_single_escalation);