Implement guest vcpu pinning using 'pthread_setaffinity_np(3)'.

Prior to this change pinning was implemented via an ioctl (VM_SET_PINNING)
that called 'sched_bind()' on behalf of the user thread.

The ULE implementation of 'sched_bind()' bumps up 'td_pinned' which in turn
runs afoul of the assertion '(td_pinned == 0)' in userret().

Using the cpuset affinity to implement pinning of the vcpu threads works with
both 4BSD and ULE schedulers and has the happy side-effect of getting rid
of a bunch of code in vmm.ko.

Discussed with:	grehan
This commit is contained in:
Neel Natu 2013-02-11 20:36:07 +00:00
parent f667ff300d
commit 485b3300cc
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=246686
9 changed files with 7 additions and 165 deletions

View file

@ -249,34 +249,6 @@ vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val)
return (error);
}
int
vm_get_pinning(struct vmctx *ctx, int vcpu, int *host_cpuid)
{
int error;
struct vm_pin vmpin;
bzero(&vmpin, sizeof(vmpin));
vmpin.vm_cpuid = vcpu;
error = ioctl(ctx->fd, VM_GET_PINNING, &vmpin);
*host_cpuid = vmpin.host_cpuid;
return (error);
}
int
vm_set_pinning(struct vmctx *ctx, int vcpu, int host_cpuid)
{
int error;
struct vm_pin vmpin;
bzero(&vmpin, sizeof(vmpin));
vmpin.vm_cpuid = vcpu;
vmpin.host_cpuid = host_cpuid;
error = ioctl(ctx->fd, VM_SET_PINNING, &vmpin);
return (error);
}
int
vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *vmexit)
{

View file

@ -56,8 +56,6 @@ int vm_get_desc(struct vmctx *ctx, int vcpu, int reg,
uint64_t *base, uint32_t *limit, uint32_t *access);
int vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val);
int vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *retval);
int vm_get_pinning(struct vmctx *ctx, int vcpu, int *host_cpuid);
int vm_set_pinning(struct vmctx *ctx, int vcpu, int host_cpuid);
int vm_run(struct vmctx *ctx, int vcpu, uint64_t rip,
struct vm_exit *ret_vmexit);
int vm_apicid2vcpu(struct vmctx *ctx, int apicid);

View file

@ -102,8 +102,6 @@ int vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
struct seg_desc *ret_desc);
int vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
struct seg_desc *desc);
int vm_get_pinning(struct vm *vm, int vcpu, int *cpuid);
int vm_set_pinning(struct vm *vm, int vcpu, int cpuid);
int vm_run(struct vm *vm, struct vm_run *vmrun);
int vm_inject_event(struct vm *vm, int vcpu, int type,
int vector, uint32_t error_code, int error_code_valid);

View file

@ -51,11 +51,6 @@ struct vm_seg_desc { /* data or code segment */
struct seg_desc desc;
};
struct vm_pin {
int vm_cpuid;
int host_cpuid; /* -1 to unpin */
};
struct vm_run {
int cpuid;
uint64_t rip; /* start running here */
@ -142,8 +137,6 @@ struct vm_x2apic {
enum {
IOCNUM_RUN,
IOCNUM_SET_PINNING,
IOCNUM_GET_PINNING,
IOCNUM_MAP_MEMORY,
IOCNUM_GET_MEMORY_SEG,
IOCNUM_SET_REGISTER,
@ -168,10 +161,6 @@ enum {
#define VM_RUN \
_IOWR('v', IOCNUM_RUN, struct vm_run)
#define VM_SET_PINNING \
_IOW('v', IOCNUM_SET_PINNING, struct vm_pin)
#define VM_GET_PINNING \
_IOWR('v', IOCNUM_GET_PINNING, struct vm_pin)
#define VM_MAP_MEMORY \
_IOWR('v', IOCNUM_MAP_MEMORY, struct vm_memory_segment)
#define VM_GET_MEMORY_SEG \

View file

@ -402,31 +402,6 @@ pptintr(void *arg)
return (FILTER_HANDLED);
}
/*
* XXX
* When we try to free the MSI resource the kernel will bind the thread to
* the host cpu was originally handling the MSI. The function freeing the
* MSI vector (apic_free_vector()) will panic the kernel if the thread
* is already bound to a cpu.
*
* So, we temporarily unbind the vcpu thread before freeing the MSI resource.
*/
static void
PPT_TEARDOWN_MSI(struct vm *vm, int vcpu, struct pptdev *ppt)
{
int pincpu = -1;
vm_get_pinning(vm, vcpu, &pincpu);
if (pincpu >= 0)
vm_set_pinning(vm, vcpu, -1);
ppt_teardown_msi(ppt);
if (pincpu >= 0)
vm_set_pinning(vm, vcpu, pincpu);
}
int
ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
int destcpu, int vector, int numvec)
@ -447,7 +422,7 @@ ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
return (EBUSY);
/* Free any allocated resources */
PPT_TEARDOWN_MSI(vm, vcpu, ppt);
ppt_teardown_msi(ppt);
if (numvec == 0) /* nothing more to do */
return (0);
@ -513,7 +488,7 @@ ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
}
if (i < numvec) {
PPT_TEARDOWN_MSI(vm, vcpu, ppt);
ppt_teardown_msi(ppt);
return (ENXIO);
}

View file

@ -70,7 +70,6 @@ struct vcpu {
int flags;
enum vcpu_state state;
struct mtx mtx;
int pincpu; /* host cpuid this vcpu is bound to */
int hostcpu; /* host cpuid this vcpu last ran on */
uint64_t guest_msrs[VMM_MSR_NUM];
struct vlapic *vlapic;
@ -81,18 +80,6 @@ struct vcpu {
enum x2apic_state x2apic_state;
int nmi_pending;
};
#define VCPU_F_PINNED 0x0001
#define VCPU_PINCPU(vm, vcpuid) \
((vm->vcpu[vcpuid].flags & VCPU_F_PINNED) ? vm->vcpu[vcpuid].pincpu : -1)
#define VCPU_UNPIN(vm, vcpuid) (vm->vcpu[vcpuid].flags &= ~VCPU_F_PINNED)
#define VCPU_PIN(vm, vcpuid, host_cpuid) \
do { \
vm->vcpu[vcpuid].flags |= VCPU_F_PINNED; \
vm->vcpu[vcpuid].pincpu = host_cpuid; \
} while(0)
#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx))
@ -594,52 +581,6 @@ vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
return (VMSETDESC(vm->cookie, vcpu, reg, desc));
}
int
vm_get_pinning(struct vm *vm, int vcpuid, int *cpuid)
{
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
return (EINVAL);
*cpuid = VCPU_PINCPU(vm, vcpuid);
return (0);
}
int
vm_set_pinning(struct vm *vm, int vcpuid, int host_cpuid)
{
struct thread *td;
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
return (EINVAL);
td = curthread; /* XXXSMP only safe when muxing vcpus */
/* unpin */
if (host_cpuid < 0) {
VCPU_UNPIN(vm, vcpuid);
thread_lock(td);
sched_unbind(td);
thread_unlock(td);
return (0);
}
if (CPU_ABSENT(host_cpuid))
return (EINVAL);
/*
* XXX we should check that 'host_cpuid' has not already been pinned
* by another vm.
*/
thread_lock(td);
sched_bind(td, host_cpuid);
thread_unlock(td);
VCPU_PIN(vm, vcpuid, host_cpuid);
return (0);
}
static void
restore_guest_fpustate(struct vcpu *vcpu)
{

View file

@ -144,7 +144,6 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
struct vm_memory_segment *seg;
struct vm_register *vmreg;
struct vm_seg_desc* vmsegdesc;
struct vm_pin *vmpin;
struct vm_run *vmrun;
struct vm_event *vmevent;
struct vm_lapic_irq *vmirq;
@ -170,7 +169,6 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
*/
switch (cmd) {
case VM_RUN:
case VM_SET_PINNING:
case VM_GET_REGISTER:
case VM_SET_REGISTER:
case VM_GET_SEGMENT_DESCRIPTOR:
@ -301,16 +299,6 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
vmirq = (struct vm_lapic_irq *)data;
error = lapic_set_intr(sc->vm, vmirq->cpuid, vmirq->vector);
break;
case VM_SET_PINNING:
vmpin = (struct vm_pin *)data;
error = vm_set_pinning(sc->vm, vmpin->vm_cpuid,
vmpin->host_cpuid);
break;
case VM_GET_PINNING:
vmpin = (struct vm_pin *)data;
error = vm_get_pinning(sc->vm, vmpin->vm_cpuid,
&vmpin->host_cpuid);
break;
case VM_MAP_MEMORY:
seg = (struct vm_memory_segment *)data;
error = vm_malloc(sc->vm, seg->gpa, seg->len);

View file

@ -520,13 +520,17 @@ static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
static void
vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip)
{
cpuset_t mask;
int error, rc, prevcpu;
if (guest_vcpu_mux)
setup_timeslice();
if (pincpu >= 0) {
error = vm_set_pinning(ctx, vcpu, pincpu + vcpu);
CPU_ZERO(&mask);
CPU_SET(pincpu + vcpu, &mask);
error = pthread_setaffinity_np(pthread_self(),
sizeof(mask), &mask);
assert(error == 0);
}

View file

@ -183,8 +183,6 @@ usage(void)
" [--get-vmcs-exit-interruption-info]\n"
" [--get-vmcs-exit-interruption-error]\n"
" [--get-vmcs-interruptibility]\n"
" [--set-pinning=<host_cpuid>]\n"
" [--get-pinning]\n"
" [--set-x2apic-state=<state>]\n"
" [--get-x2apic-state]\n"
" [--set-lowmem=<memory below 4GB in units of MB>]\n"
@ -218,7 +216,6 @@ static int set_desc_tr, get_desc_tr;
static int set_desc_ldtr, get_desc_ldtr;
static int set_cs, set_ds, set_es, set_fs, set_gs, set_ss, set_tr, set_ldtr;
static int get_cs, get_ds, get_es, get_fs, get_gs, get_ss, get_tr, get_ldtr;
static int set_pinning, get_pinning, pincpu;
static int set_x2apic_state, get_x2apic_state;
enum x2apic_state x2apic_state;
static int run;
@ -374,7 +371,6 @@ enum {
SET_SS,
SET_TR,
SET_LDTR,
SET_PINNING,
SET_X2APIC_STATE,
SET_VMCS_EXCEPTION_BITMAP,
SET_VMCS_ENTRY_INTERRUPTION_INFO,
@ -423,7 +419,6 @@ main(int argc, char *argv[])
{ "set-ss", REQ_ARG, 0, SET_SS },
{ "set-tr", REQ_ARG, 0, SET_TR },
{ "set-ldtr", REQ_ARG, 0, SET_LDTR },
{ "set-pinning",REQ_ARG, 0, SET_PINNING },
{ "set-x2apic-state",REQ_ARG, 0, SET_X2APIC_STATE },
{ "set-vmcs-exception-bitmap",
REQ_ARG, 0, SET_VMCS_EXCEPTION_BITMAP },
@ -552,7 +547,6 @@ main(int argc, char *argv[])
NO_ARG, &get_vmcs_exit_interruption_error, 1},
{ "get-vmcs-interruptibility",
NO_ARG, &get_vmcs_interruptibility, 1 },
{ "get-pinning",NO_ARG, &get_pinning, 1 },
{ "get-x2apic-state",NO_ARG, &get_x2apic_state, 1 },
{ "get-all", NO_ARG, &get_all, 1 },
{ "run", NO_ARG, &run, 1 },
@ -659,10 +653,6 @@ main(int argc, char *argv[])
ldtr = strtoul(optarg, NULL, 0);
set_ldtr = 1;
break;
case SET_PINNING:
pincpu = strtol(optarg, NULL, 0);
set_pinning = 1;
break;
case SET_X2APIC_STATE:
x2apic_state = strtol(optarg, NULL, 0);
set_x2apic_state = 1;
@ -812,9 +802,6 @@ main(int argc, char *argv[])
if (!error && set_ldtr)
error = vm_set_register(ctx, vcpu, VM_REG_GUEST_LDTR, ldtr);
if (!error && set_pinning)
error = vm_set_pinning(ctx, vcpu, pincpu);
if (!error && set_x2apic_state)
error = vm_set_x2apic_state(ctx, vcpu, x2apic_state);
@ -1135,16 +1122,6 @@ main(int argc, char *argv[])
printf("ldtr[%d]\t\t0x%04lx\n", vcpu, ldtr);
}
if (!error && (get_pinning || get_all)) {
error = vm_get_pinning(ctx, vcpu, &pincpu);
if (error == 0) {
if (pincpu < 0)
printf("pincpu[%d]\tunpinned\n", vcpu);
else
printf("pincpu[%d]\t%d\n", vcpu, pincpu);
}
}
if (!error && (get_x2apic_state || get_all)) {
error = vm_get_x2apic_state(ctx, vcpu, &x2apic_state);
if (error == 0)