bhyve: Split vmexit handling into a separate file

Put it in amd64, since most of it is MD and won't be used on arm64.  Add
a bit of glue to bhyverun.h to make CPU startup and shutdown work
without having to export more global variables.  AP startup will be
reworked further in a future revision.

This makes bhyverun.c much more machine-independent.

No functional change intended.

Reviewed by:	corvink, jhb
MFC after:	1 week
Sponsored by:	Innovate UK
Differential Revision:	https://reviews.freebsd.org/D40556
This commit is contained in:
Mark Johnston 2023-10-04 12:22:56 -04:00
parent a1642451ce
commit 72f9c9d82f
5 changed files with 551 additions and 452 deletions

View file

@ -75,6 +75,7 @@ SRCS= \
usb_mouse.c \
vga.c \
virtio.c \
vmexit.c \
vmgenc.c \
xmsr.c

View file

@ -0,0 +1,493 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/types.h>
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
#include <machine/vmm_instruction_emul.h>
#include <amd64/vmm/intel/vmcs.h>
#include <x86/apicreg.h>
#include <assert.h>
#include <err.h>
#include <errno.h>
#include <stdlib.h>
#include <strings.h>
#include <unistd.h>
#include <vmmapi.h>
#include "bhyverun.h"
#include "config.h"
#include "debug.h"
#include "gdb.h"
#include "inout.h"
#include "mem.h"
#ifdef BHYVE_SNAPSHOT
#include "snapshot.h"
#endif
#include "spinup_ap.h"
#include "vmexit.h"
#include "xmsr.h"
static int
vmexit_inout(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun)
{
struct vm_exit *vme;
int error;
int bytes, port, in;
vme = vmrun->vm_exit;
port = vme->u.inout.port;
bytes = vme->u.inout.bytes;
in = vme->u.inout.in;
error = emulate_inout(ctx, vcpu, vme);
if (error) {
fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n",
in ? "in" : "out",
bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'),
port, vme->rip);
return (VMEXIT_ABORT);
} else {
return (VMEXIT_CONTINUE);
}
}
static int
vmexit_rdmsr(struct vmctx *ctx __unused, struct vcpu *vcpu,
struct vm_run *vmrun)
{
struct vm_exit *vme;
uint64_t val;
uint32_t eax, edx;
int error;
vme = vmrun->vm_exit;
val = 0;
error = emulate_rdmsr(vcpu, vme->u.msr.code, &val);
if (error != 0) {
fprintf(stderr, "rdmsr to register %#x on vcpu %d\n",
vme->u.msr.code, vcpu_id(vcpu));
if (get_config_bool("x86.strictmsr")) {
vm_inject_gp(vcpu);
return (VMEXIT_CONTINUE);
}
}
eax = val;
error = vm_set_register(vcpu, VM_REG_GUEST_RAX, eax);
assert(error == 0);
edx = val >> 32;
error = vm_set_register(vcpu, VM_REG_GUEST_RDX, edx);
assert(error == 0);
return (VMEXIT_CONTINUE);
}
static int
vmexit_wrmsr(struct vmctx *ctx __unused, struct vcpu *vcpu,
struct vm_run *vmrun)
{
struct vm_exit *vme;
int error;
vme = vmrun->vm_exit;
error = emulate_wrmsr(vcpu, vme->u.msr.code, vme->u.msr.wval);
if (error != 0) {
fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n",
vme->u.msr.code, vme->u.msr.wval, vcpu_id(vcpu));
if (get_config_bool("x86.strictmsr")) {
vm_inject_gp(vcpu);
return (VMEXIT_CONTINUE);
}
}
return (VMEXIT_CONTINUE);
}
static const char * const vmx_exit_reason_desc[] = {
[EXIT_REASON_EXCEPTION] = "Exception or non-maskable interrupt (NMI)",
[EXIT_REASON_EXT_INTR] = "External interrupt",
[EXIT_REASON_TRIPLE_FAULT] = "Triple fault",
[EXIT_REASON_INIT] = "INIT signal",
[EXIT_REASON_SIPI] = "Start-up IPI (SIPI)",
[EXIT_REASON_IO_SMI] = "I/O system-management interrupt (SMI)",
[EXIT_REASON_SMI] = "Other SMI",
[EXIT_REASON_INTR_WINDOW] = "Interrupt window",
[EXIT_REASON_NMI_WINDOW] = "NMI window",
[EXIT_REASON_TASK_SWITCH] = "Task switch",
[EXIT_REASON_CPUID] = "CPUID",
[EXIT_REASON_GETSEC] = "GETSEC",
[EXIT_REASON_HLT] = "HLT",
[EXIT_REASON_INVD] = "INVD",
[EXIT_REASON_INVLPG] = "INVLPG",
[EXIT_REASON_RDPMC] = "RDPMC",
[EXIT_REASON_RDTSC] = "RDTSC",
[EXIT_REASON_RSM] = "RSM",
[EXIT_REASON_VMCALL] = "VMCALL",
[EXIT_REASON_VMCLEAR] = "VMCLEAR",
[EXIT_REASON_VMLAUNCH] = "VMLAUNCH",
[EXIT_REASON_VMPTRLD] = "VMPTRLD",
[EXIT_REASON_VMPTRST] = "VMPTRST",
[EXIT_REASON_VMREAD] = "VMREAD",
[EXIT_REASON_VMRESUME] = "VMRESUME",
[EXIT_REASON_VMWRITE] = "VMWRITE",
[EXIT_REASON_VMXOFF] = "VMXOFF",
[EXIT_REASON_VMXON] = "VMXON",
[EXIT_REASON_CR_ACCESS] = "Control-register accesses",
[EXIT_REASON_DR_ACCESS] = "MOV DR",
[EXIT_REASON_INOUT] = "I/O instruction",
[EXIT_REASON_RDMSR] = "RDMSR",
[EXIT_REASON_WRMSR] = "WRMSR",
[EXIT_REASON_INVAL_VMCS] =
"VM-entry failure due to invalid guest state",
[EXIT_REASON_INVAL_MSR] = "VM-entry failure due to MSR loading",
[EXIT_REASON_MWAIT] = "MWAIT",
[EXIT_REASON_MTF] = "Monitor trap flag",
[EXIT_REASON_MONITOR] = "MONITOR",
[EXIT_REASON_PAUSE] = "PAUSE",
[EXIT_REASON_MCE_DURING_ENTRY] =
"VM-entry failure due to machine-check event",
[EXIT_REASON_TPR] = "TPR below threshold",
[EXIT_REASON_APIC_ACCESS] = "APIC access",
[EXIT_REASON_VIRTUALIZED_EOI] = "Virtualized EOI",
[EXIT_REASON_GDTR_IDTR] = "Access to GDTR or IDTR",
[EXIT_REASON_LDTR_TR] = "Access to LDTR or TR",
[EXIT_REASON_EPT_FAULT] = "EPT violation",
[EXIT_REASON_EPT_MISCONFIG] = "EPT misconfiguration",
[EXIT_REASON_INVEPT] = "INVEPT",
[EXIT_REASON_RDTSCP] = "RDTSCP",
[EXIT_REASON_VMX_PREEMPT] = "VMX-preemption timer expired",
[EXIT_REASON_INVVPID] = "INVVPID",
[EXIT_REASON_WBINVD] = "WBINVD",
[EXIT_REASON_XSETBV] = "XSETBV",
[EXIT_REASON_APIC_WRITE] = "APIC write",
[EXIT_REASON_RDRAND] = "RDRAND",
[EXIT_REASON_INVPCID] = "INVPCID",
[EXIT_REASON_VMFUNC] = "VMFUNC",
[EXIT_REASON_ENCLS] = "ENCLS",
[EXIT_REASON_RDSEED] = "RDSEED",
[EXIT_REASON_PM_LOG_FULL] = "Page-modification log full",
[EXIT_REASON_XSAVES] = "XSAVES",
[EXIT_REASON_XRSTORS] = "XRSTORS"
};
static const char *
vmexit_vmx_desc(uint32_t exit_reason)
{
if (exit_reason >= nitems(vmx_exit_reason_desc) ||
vmx_exit_reason_desc[exit_reason] == NULL)
return ("Unknown");
return (vmx_exit_reason_desc[exit_reason]);
}
#define DEBUG_EPT_MISCONFIG
#ifdef DEBUG_EPT_MISCONFIG
#define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400
static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4];
static int ept_misconfig_ptenum;
#endif
static int
vmexit_vmx(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun)
{
struct vm_exit *vme;
vme = vmrun->vm_exit;
fprintf(stderr, "vm exit[%d]\n", vcpu_id(vcpu));
fprintf(stderr, "\treason\t\tVMX\n");
fprintf(stderr, "\trip\t\t0x%016lx\n", vme->rip);
fprintf(stderr, "\tinst_length\t%d\n", vme->inst_length);
fprintf(stderr, "\tstatus\t\t%d\n", vme->u.vmx.status);
fprintf(stderr, "\texit_reason\t%u (%s)\n", vme->u.vmx.exit_reason,
vmexit_vmx_desc(vme->u.vmx.exit_reason));
fprintf(stderr, "\tqualification\t0x%016lx\n",
vme->u.vmx.exit_qualification);
fprintf(stderr, "\tinst_type\t\t%d\n", vme->u.vmx.inst_type);
fprintf(stderr, "\tinst_error\t\t%d\n", vme->u.vmx.inst_error);
#ifdef DEBUG_EPT_MISCONFIG
if (vme->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) {
vm_get_register(vcpu,
VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS),
&ept_misconfig_gpa);
vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte,
&ept_misconfig_ptenum);
fprintf(stderr, "\tEPT misconfiguration:\n");
fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa);
fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n",
ept_misconfig_ptenum, ept_misconfig_pte[0],
ept_misconfig_pte[1], ept_misconfig_pte[2],
ept_misconfig_pte[3]);
}
#endif /* DEBUG_EPT_MISCONFIG */
return (VMEXIT_ABORT);
}
static int
vmexit_svm(struct vmctx *ctx __unused, struct vcpu *vcpu, struct vm_run *vmrun)
{
struct vm_exit *vme;
vme = vmrun->vm_exit;
fprintf(stderr, "vm exit[%d]\n", vcpu_id(vcpu));
fprintf(stderr, "\treason\t\tSVM\n");
fprintf(stderr, "\trip\t\t0x%016lx\n", vme->rip);
fprintf(stderr, "\tinst_length\t%d\n", vme->inst_length);
fprintf(stderr, "\texitcode\t%#lx\n", vme->u.svm.exitcode);
fprintf(stderr, "\texitinfo1\t%#lx\n", vme->u.svm.exitinfo1);
fprintf(stderr, "\texitinfo2\t%#lx\n", vme->u.svm.exitinfo2);
return (VMEXIT_ABORT);
}
static int
vmexit_bogus(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
struct vm_run *vmrun)
{
assert(vmrun->vm_exit->inst_length == 0);
return (VMEXIT_CONTINUE);
}
static int
vmexit_reqidle(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
struct vm_run *vmrun)
{
assert(vmrun->vm_exit->inst_length == 0);
return (VMEXIT_CONTINUE);
}
static int
vmexit_hlt(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
struct vm_run *vmrun __unused)
{
/*
* Just continue execution with the next instruction. We use
* the HLT VM exit as a way to be friendly with the host
* scheduler.
*/
return (VMEXIT_CONTINUE);
}
static int
vmexit_pause(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
struct vm_run *vmrun __unused)
{
return (VMEXIT_CONTINUE);
}
static int
vmexit_mtrap(struct vmctx *ctx __unused, struct vcpu *vcpu,
struct vm_run *vmrun)
{
assert(vmrun->vm_exit->inst_length == 0);
#ifdef BHYVE_SNAPSHOT
checkpoint_cpu_suspend(vcpu_id(vcpu));
#endif
gdb_cpu_mtrap(vcpu);
#ifdef BHYVE_SNAPSHOT
checkpoint_cpu_resume(vcpu_id(vcpu));
#endif
return (VMEXIT_CONTINUE);
}
static int
vmexit_inst_emul(struct vmctx *ctx __unused, struct vcpu *vcpu,
struct vm_run *vmrun)
{
struct vm_exit *vme;
struct vie *vie;
int err, i, cs_d;
enum vm_cpu_mode mode;
vme = vmrun->vm_exit;
vie = &vme->u.inst_emul.vie;
if (!vie->decoded) {
/*
* Attempt to decode in userspace as a fallback. This allows
* updating instruction decode in bhyve without rebooting the
* kernel (rapid prototyping), albeit with much slower
* emulation.
*/
vie_restart(vie);
mode = vme->u.inst_emul.paging.cpu_mode;
cs_d = vme->u.inst_emul.cs_d;
if (vmm_decode_instruction(mode, cs_d, vie) != 0)
goto fail;
if (vm_set_register(vcpu, VM_REG_GUEST_RIP,
vme->rip + vie->num_processed) != 0)
goto fail;
}
err = emulate_mem(vcpu, vme->u.inst_emul.gpa, vie,
&vme->u.inst_emul.paging);
if (err) {
if (err == ESRCH) {
EPRINTLN("Unhandled memory access to 0x%lx\n",
vme->u.inst_emul.gpa);
}
goto fail;
}
return (VMEXIT_CONTINUE);
fail:
fprintf(stderr, "Failed to emulate instruction sequence [ ");
for (i = 0; i < vie->num_valid; i++)
fprintf(stderr, "%02x", vie->inst[i]);
FPRINTLN(stderr, " ] at 0x%lx", vme->rip);
return (VMEXIT_ABORT);
}
static int
vmexit_suspend(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun)
{
struct vm_exit *vme;
enum vm_suspend_how how;
int vcpuid = vcpu_id(vcpu);
vme = vmrun->vm_exit;
how = vme->u.suspended.how;
fbsdrun_deletecpu(vcpuid);
switch (how) {
case VM_SUSPEND_RESET:
exit(0);
case VM_SUSPEND_POWEROFF:
if (get_config_bool_default("destroy_on_poweroff", false))
vm_destroy(ctx);
exit(1);
case VM_SUSPEND_HALT:
exit(2);
case VM_SUSPEND_TRIPLEFAULT:
exit(3);
default:
fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how);
exit(100);
}
return (0); /* NOTREACHED */
}
static int
vmexit_debug(struct vmctx *ctx __unused, struct vcpu *vcpu,
struct vm_run *vmrun __unused)
{
#ifdef BHYVE_SNAPSHOT
checkpoint_cpu_suspend(vcpu_id(vcpu));
#endif
gdb_cpu_suspend(vcpu);
#ifdef BHYVE_SNAPSHOT
checkpoint_cpu_resume(vcpu_id(vcpu));
#endif
/*
* XXX-MJ sleep for a short period to avoid chewing up the CPU in the
* window between activation of the vCPU thread and the STARTUP IPI.
*/
usleep(1000);
return (VMEXIT_CONTINUE);
}
static int
vmexit_breakpoint(struct vmctx *ctx __unused, struct vcpu *vcpu,
struct vm_run *vmrun)
{
gdb_cpu_breakpoint(vcpu, vmrun->vm_exit);
return (VMEXIT_CONTINUE);
}
static int
vmexit_ipi(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
struct vm_run *vmrun)
{
struct vm_exit *vme;
cpuset_t *dmask;
int error = -1;
int i;
dmask = vmrun->cpuset;
vme = vmrun->vm_exit;
switch (vme->u.ipi.mode) {
case APIC_DELMODE_INIT:
CPU_FOREACH_ISSET(i, dmask) {
error = fbsdrun_suspendcpu(i);
if (error) {
warnx("failed to suspend cpu %d", i);
break;
}
}
break;
case APIC_DELMODE_STARTUP:
CPU_FOREACH_ISSET(i, dmask) {
spinup_ap(fbsdrun_vcpu(i),
vme->u.ipi.vector << PAGE_SHIFT);
}
error = 0;
break;
default:
break;
}
return (error);
}
int vmexit_task_switch(struct vmctx *, struct vcpu *, struct vm_run *);
const vmexit_handler_t vmexit_handlers[VM_EXITCODE_MAX] = {
[VM_EXITCODE_INOUT] = vmexit_inout,
[VM_EXITCODE_INOUT_STR] = vmexit_inout,
[VM_EXITCODE_VMX] = vmexit_vmx,
[VM_EXITCODE_SVM] = vmexit_svm,
[VM_EXITCODE_BOGUS] = vmexit_bogus,
[VM_EXITCODE_REQIDLE] = vmexit_reqidle,
[VM_EXITCODE_RDMSR] = vmexit_rdmsr,
[VM_EXITCODE_WRMSR] = vmexit_wrmsr,
[VM_EXITCODE_MTRAP] = vmexit_mtrap,
[VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
[VM_EXITCODE_SUSPENDED] = vmexit_suspend,
[VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch,
[VM_EXITCODE_DEBUG] = vmexit_debug,
[VM_EXITCODE_BPT] = vmexit_breakpoint,
[VM_EXITCODE_IPI] = vmexit_ipi,
[VM_EXITCODE_HLT] = vmexit_hlt,
[VM_EXITCODE_PAUSE] = vmexit_pause,
};

View file

@ -41,11 +41,7 @@
#include <sys/un.h>
#endif
#include <amd64/vmm/intel/vmcs.h>
#include <x86/apicreg.h>
#include <machine/atomic.h>
#include <machine/segments.h>
#ifndef WITHOUT_CAPSICUM
#include <capsicum_helpers.h>
@ -73,11 +69,6 @@
#include <libxo/xo.h>
#endif
#include <machine/vmm.h>
#ifndef WITHOUT_CAPSICUM
#include <machine/vmm_dev.h>
#endif
#include <machine/vmm_instruction_emul.h>
#include <vmmapi.h>
#include "bhyverun.h"
@ -112,83 +103,14 @@
#include "snapshot.h"
#endif
#include "tpm_device.h"
#include "xmsr.h"
#include "spinup_ap.h"
#include "rtc.h"
#include "vmgenc.h"
#include "vmexit.h"
#include "xmsr.h"
#define MB (1024UL * 1024)
#define GB (1024UL * MB)
static const char * const vmx_exit_reason_desc[] = {
[EXIT_REASON_EXCEPTION] = "Exception or non-maskable interrupt (NMI)",
[EXIT_REASON_EXT_INTR] = "External interrupt",
[EXIT_REASON_TRIPLE_FAULT] = "Triple fault",
[EXIT_REASON_INIT] = "INIT signal",
[EXIT_REASON_SIPI] = "Start-up IPI (SIPI)",
[EXIT_REASON_IO_SMI] = "I/O system-management interrupt (SMI)",
[EXIT_REASON_SMI] = "Other SMI",
[EXIT_REASON_INTR_WINDOW] = "Interrupt window",
[EXIT_REASON_NMI_WINDOW] = "NMI window",
[EXIT_REASON_TASK_SWITCH] = "Task switch",
[EXIT_REASON_CPUID] = "CPUID",
[EXIT_REASON_GETSEC] = "GETSEC",
[EXIT_REASON_HLT] = "HLT",
[EXIT_REASON_INVD] = "INVD",
[EXIT_REASON_INVLPG] = "INVLPG",
[EXIT_REASON_RDPMC] = "RDPMC",
[EXIT_REASON_RDTSC] = "RDTSC",
[EXIT_REASON_RSM] = "RSM",
[EXIT_REASON_VMCALL] = "VMCALL",
[EXIT_REASON_VMCLEAR] = "VMCLEAR",
[EXIT_REASON_VMLAUNCH] = "VMLAUNCH",
[EXIT_REASON_VMPTRLD] = "VMPTRLD",
[EXIT_REASON_VMPTRST] = "VMPTRST",
[EXIT_REASON_VMREAD] = "VMREAD",
[EXIT_REASON_VMRESUME] = "VMRESUME",
[EXIT_REASON_VMWRITE] = "VMWRITE",
[EXIT_REASON_VMXOFF] = "VMXOFF",
[EXIT_REASON_VMXON] = "VMXON",
[EXIT_REASON_CR_ACCESS] = "Control-register accesses",
[EXIT_REASON_DR_ACCESS] = "MOV DR",
[EXIT_REASON_INOUT] = "I/O instruction",
[EXIT_REASON_RDMSR] = "RDMSR",
[EXIT_REASON_WRMSR] = "WRMSR",
[EXIT_REASON_INVAL_VMCS] =
"VM-entry failure due to invalid guest state",
[EXIT_REASON_INVAL_MSR] = "VM-entry failure due to MSR loading",
[EXIT_REASON_MWAIT] = "MWAIT",
[EXIT_REASON_MTF] = "Monitor trap flag",
[EXIT_REASON_MONITOR] = "MONITOR",
[EXIT_REASON_PAUSE] = "PAUSE",
[EXIT_REASON_MCE_DURING_ENTRY] =
"VM-entry failure due to machine-check event",
[EXIT_REASON_TPR] = "TPR below threshold",
[EXIT_REASON_APIC_ACCESS] = "APIC access",
[EXIT_REASON_VIRTUALIZED_EOI] = "Virtualized EOI",
[EXIT_REASON_GDTR_IDTR] = "Access to GDTR or IDTR",
[EXIT_REASON_LDTR_TR] = "Access to LDTR or TR",
[EXIT_REASON_EPT_FAULT] = "EPT violation",
[EXIT_REASON_EPT_MISCONFIG] = "EPT misconfiguration",
[EXIT_REASON_INVEPT] = "INVEPT",
[EXIT_REASON_RDTSCP] = "RDTSCP",
[EXIT_REASON_VMX_PREEMPT] = "VMX-preemption timer expired",
[EXIT_REASON_INVVPID] = "INVVPID",
[EXIT_REASON_WBINVD] = "WBINVD",
[EXIT_REASON_XSETBV] = "XSETBV",
[EXIT_REASON_APIC_WRITE] = "APIC write",
[EXIT_REASON_RDRAND] = "RDRAND",
[EXIT_REASON_INVPCID] = "INVPCID",
[EXIT_REASON_VMFUNC] = "VMFUNC",
[EXIT_REASON_ENCLS] = "ENCLS",
[EXIT_REASON_RDSEED] = "RDSEED",
[EXIT_REASON_PM_LOG_FULL] = "Page-modification log full",
[EXIT_REASON_XSAVES] = "XSAVES",
[EXIT_REASON_XRSTORS] = "XRSTORS"
};
typedef int (*vmexit_handler_t)(struct vmctx *, struct vcpu *, struct vm_run *);
int guest_ncpus;
uint16_t cpu_cores, cpu_sockets, cpu_threads;
@ -508,6 +430,12 @@ fbsdrun_virtio_msix(void)
return (get_config_bool_default("virtio_msix", true));
}
struct vcpu *
fbsdrun_vcpu(int vcpuid)
{
return (vcpu_info[vcpuid].vcpu);
}
static void *
fbsdrun_start_thread(void *param)
{
@ -554,7 +482,7 @@ fbsdrun_addcpu(struct vcpu_info *vi)
assert(error == 0);
}
static void
void
fbsdrun_deletecpu(int vcpu)
{
static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER;
@ -581,376 +509,12 @@ fbsdrun_deletecpu(int vcpu)
pthread_mutex_unlock(&resetcpu_mtx);
}
static int
vmexit_inout(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun)
int
fbsdrun_suspendcpu(int vcpuid)
{
struct vm_exit *vme;
int error;
int bytes, port, in;
vme = vmrun->vm_exit;
port = vme->u.inout.port;
bytes = vme->u.inout.bytes;
in = vme->u.inout.in;
error = emulate_inout(ctx, vcpu, vme);
if (error) {
fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n",
in ? "in" : "out",
bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'),
port, vme->rip);
return (VMEXIT_ABORT);
} else {
return (VMEXIT_CONTINUE);
}
return (vm_suspend_cpu(vcpu_info[vcpuid].vcpu));
}
static int
vmexit_rdmsr(struct vmctx *ctx __unused, struct vcpu *vcpu,
struct vm_run *vmrun)
{
struct vm_exit *vme;
uint64_t val;
uint32_t eax, edx;
int error;
vme = vmrun->vm_exit;
val = 0;
error = emulate_rdmsr(vcpu, vme->u.msr.code, &val);
if (error != 0) {
fprintf(stderr, "rdmsr to register %#x on vcpu %d\n",
vme->u.msr.code, vcpu_id(vcpu));
if (get_config_bool("x86.strictmsr")) {
vm_inject_gp(vcpu);
return (VMEXIT_CONTINUE);
}
}
eax = val;
error = vm_set_register(vcpu, VM_REG_GUEST_RAX, eax);
assert(error == 0);
edx = val >> 32;
error = vm_set_register(vcpu, VM_REG_GUEST_RDX, edx);
assert(error == 0);
return (VMEXIT_CONTINUE);
}
static int
vmexit_wrmsr(struct vmctx *ctx __unused, struct vcpu *vcpu,
struct vm_run *vmrun)
{
struct vm_exit *vme;
int error;
vme = vmrun->vm_exit;
error = emulate_wrmsr(vcpu, vme->u.msr.code, vme->u.msr.wval);
if (error != 0) {
fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n",
vme->u.msr.code, vme->u.msr.wval, vcpu_id(vcpu));
if (get_config_bool("x86.strictmsr")) {
vm_inject_gp(vcpu);
return (VMEXIT_CONTINUE);
}
}
return (VMEXIT_CONTINUE);
}
#define DEBUG_EPT_MISCONFIG
#ifdef DEBUG_EPT_MISCONFIG
#define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400
static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4];
static int ept_misconfig_ptenum;
#endif
static const char *
vmexit_vmx_desc(uint32_t exit_reason)
{
if (exit_reason >= nitems(vmx_exit_reason_desc) ||
vmx_exit_reason_desc[exit_reason] == NULL)
return ("Unknown");
return (vmx_exit_reason_desc[exit_reason]);
}
static int
vmexit_vmx(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun)
{
struct vm_exit *vme;
vme = vmrun->vm_exit;
fprintf(stderr, "vm exit[%d]\n", vcpu_id(vcpu));
fprintf(stderr, "\treason\t\tVMX\n");
fprintf(stderr, "\trip\t\t0x%016lx\n", vme->rip);
fprintf(stderr, "\tinst_length\t%d\n", vme->inst_length);
fprintf(stderr, "\tstatus\t\t%d\n", vme->u.vmx.status);
fprintf(stderr, "\texit_reason\t%u (%s)\n", vme->u.vmx.exit_reason,
vmexit_vmx_desc(vme->u.vmx.exit_reason));
fprintf(stderr, "\tqualification\t0x%016lx\n",
vme->u.vmx.exit_qualification);
fprintf(stderr, "\tinst_type\t\t%d\n", vme->u.vmx.inst_type);
fprintf(stderr, "\tinst_error\t\t%d\n", vme->u.vmx.inst_error);
#ifdef DEBUG_EPT_MISCONFIG
if (vme->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) {
vm_get_register(vcpu,
VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS),
&ept_misconfig_gpa);
vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte,
&ept_misconfig_ptenum);
fprintf(stderr, "\tEPT misconfiguration:\n");
fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa);
fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n",
ept_misconfig_ptenum, ept_misconfig_pte[0],
ept_misconfig_pte[1], ept_misconfig_pte[2],
ept_misconfig_pte[3]);
}
#endif /* DEBUG_EPT_MISCONFIG */
return (VMEXIT_ABORT);
}
static int
vmexit_svm(struct vmctx *ctx __unused, struct vcpu *vcpu, struct vm_run *vmrun)
{
struct vm_exit *vme;
vme = vmrun->vm_exit;
fprintf(stderr, "vm exit[%d]\n", vcpu_id(vcpu));
fprintf(stderr, "\treason\t\tSVM\n");
fprintf(stderr, "\trip\t\t0x%016lx\n", vme->rip);
fprintf(stderr, "\tinst_length\t%d\n", vme->inst_length);
fprintf(stderr, "\texitcode\t%#lx\n", vme->u.svm.exitcode);
fprintf(stderr, "\texitinfo1\t%#lx\n", vme->u.svm.exitinfo1);
fprintf(stderr, "\texitinfo2\t%#lx\n", vme->u.svm.exitinfo2);
return (VMEXIT_ABORT);
}
static int
vmexit_bogus(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
struct vm_run *vmrun)
{
assert(vmrun->vm_exit->inst_length == 0);
return (VMEXIT_CONTINUE);
}
static int
vmexit_reqidle(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
struct vm_run *vmrun)
{
assert(vmrun->vm_exit->inst_length == 0);
return (VMEXIT_CONTINUE);
}
static int
vmexit_hlt(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
struct vm_run *vmrun __unused)
{
/*
* Just continue execution with the next instruction. We use
* the HLT VM exit as a way to be friendly with the host
* scheduler.
*/
return (VMEXIT_CONTINUE);
}
static int
vmexit_pause(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
struct vm_run *vmrun __unused)
{
return (VMEXIT_CONTINUE);
}
static int
vmexit_mtrap(struct vmctx *ctx __unused, struct vcpu *vcpu,
struct vm_run *vmrun)
{
assert(vmrun->vm_exit->inst_length == 0);
#ifdef BHYVE_SNAPSHOT
checkpoint_cpu_suspend(vcpu_id(vcpu));
#endif
gdb_cpu_mtrap(vcpu);
#ifdef BHYVE_SNAPSHOT
checkpoint_cpu_resume(vcpu_id(vcpu));
#endif
return (VMEXIT_CONTINUE);
}
static int
vmexit_inst_emul(struct vmctx *ctx __unused, struct vcpu *vcpu,
struct vm_run *vmrun)
{
struct vm_exit *vme;
struct vie *vie;
int err, i, cs_d;
enum vm_cpu_mode mode;
vme = vmrun->vm_exit;
vie = &vme->u.inst_emul.vie;
if (!vie->decoded) {
/*
* Attempt to decode in userspace as a fallback. This allows
* updating instruction decode in bhyve without rebooting the
* kernel (rapid prototyping), albeit with much slower
* emulation.
*/
vie_restart(vie);
mode = vme->u.inst_emul.paging.cpu_mode;
cs_d = vme->u.inst_emul.cs_d;
if (vmm_decode_instruction(mode, cs_d, vie) != 0)
goto fail;
if (vm_set_register(vcpu, VM_REG_GUEST_RIP,
vme->rip + vie->num_processed) != 0)
goto fail;
}
err = emulate_mem(vcpu, vme->u.inst_emul.gpa, vie,
&vme->u.inst_emul.paging);
if (err) {
if (err == ESRCH) {
EPRINTLN("Unhandled memory access to 0x%lx\n",
vme->u.inst_emul.gpa);
}
goto fail;
}
return (VMEXIT_CONTINUE);
fail:
fprintf(stderr, "Failed to emulate instruction sequence [ ");
for (i = 0; i < vie->num_valid; i++)
fprintf(stderr, "%02x", vie->inst[i]);
FPRINTLN(stderr, " ] at 0x%lx", vme->rip);
return (VMEXIT_ABORT);
}
static int
vmexit_suspend(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun)
{
struct vm_exit *vme;
enum vm_suspend_how how;
int vcpuid = vcpu_id(vcpu);
vme = vmrun->vm_exit;
how = vme->u.suspended.how;
fbsdrun_deletecpu(vcpuid);
switch (how) {
case VM_SUSPEND_RESET:
exit(0);
case VM_SUSPEND_POWEROFF:
if (get_config_bool_default("destroy_on_poweroff", false))
vm_destroy(ctx);
exit(1);
case VM_SUSPEND_HALT:
exit(2);
case VM_SUSPEND_TRIPLEFAULT:
exit(3);
default:
fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how);
exit(100);
}
return (0); /* NOTREACHED */
}
static int
vmexit_debug(struct vmctx *ctx __unused, struct vcpu *vcpu,
struct vm_run *vmrun __unused)
{
#ifdef BHYVE_SNAPSHOT
checkpoint_cpu_suspend(vcpu_id(vcpu));
#endif
gdb_cpu_suspend(vcpu);
#ifdef BHYVE_SNAPSHOT
checkpoint_cpu_resume(vcpu_id(vcpu));
#endif
/*
* XXX-MJ sleep for a short period to avoid chewing up the CPU in the
* window between activation of the vCPU thread and the STARTUP IPI.
*/
usleep(1000);
return (VMEXIT_CONTINUE);
}
static int
vmexit_breakpoint(struct vmctx *ctx __unused, struct vcpu *vcpu,
struct vm_run *vmrun)
{
gdb_cpu_breakpoint(vcpu, vmrun->vm_exit);
return (VMEXIT_CONTINUE);
}
static int
vmexit_ipi(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
struct vm_run *vmrun)
{
struct vm_exit *vme;
cpuset_t *dmask;
int error = -1;
int i;
dmask = vmrun->cpuset;
vme = vmrun->vm_exit;
switch (vme->u.ipi.mode) {
case APIC_DELMODE_INIT:
CPU_FOREACH_ISSET(i, dmask) {
error = vm_suspend_cpu(vcpu_info[i].vcpu);
if (error) {
warnx("%s: failed to suspend cpu %d\n",
__func__, i);
break;
}
}
break;
case APIC_DELMODE_STARTUP:
CPU_FOREACH_ISSET(i, dmask) {
spinup_ap(vcpu_info[i].vcpu,
vme->u.ipi.vector << PAGE_SHIFT);
}
error = 0;
break;
default:
break;
}
return (error);
}
int vmexit_task_switch(struct vmctx *, struct vcpu *, struct vm_run *);
static const vmexit_handler_t handler[VM_EXITCODE_MAX] = {
[VM_EXITCODE_INOUT] = vmexit_inout,
[VM_EXITCODE_INOUT_STR] = vmexit_inout,
[VM_EXITCODE_VMX] = vmexit_vmx,
[VM_EXITCODE_SVM] = vmexit_svm,
[VM_EXITCODE_BOGUS] = vmexit_bogus,
[VM_EXITCODE_REQIDLE] = vmexit_reqidle,
[VM_EXITCODE_RDMSR] = vmexit_rdmsr,
[VM_EXITCODE_WRMSR] = vmexit_wrmsr,
[VM_EXITCODE_MTRAP] = vmexit_mtrap,
[VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
[VM_EXITCODE_SUSPENDED] = vmexit_suspend,
[VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch,
[VM_EXITCODE_DEBUG] = vmexit_debug,
[VM_EXITCODE_BPT] = vmexit_breakpoint,
[VM_EXITCODE_IPI] = vmexit_ipi,
[VM_EXITCODE_HLT] = vmexit_hlt,
[VM_EXITCODE_PAUSE] = vmexit_pause,
};
static void
vm_loop(struct vmctx *ctx, struct vcpu *vcpu)
{
@ -973,13 +537,13 @@ vm_loop(struct vmctx *ctx, struct vcpu *vcpu)
break;
exitcode = vme.exitcode;
if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) {
fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n",
exitcode);
if (exitcode >= VM_EXITCODE_MAX ||
vmexit_handlers[exitcode] == NULL) {
warnx("vm_loop: unexpected exitcode 0x%x", exitcode);
exit(4);
}
rc = (*handler[exitcode])(ctx, vcpu, &vmrun);
rc = (*vmexit_handlers[exitcode])(ctx, vcpu, &vmrun);
switch (rc) {
case VMEXIT_CONTINUE:

View file

@ -44,6 +44,13 @@ void *paddr_guest2host(struct vmctx *ctx, uintptr_t addr, size_t len);
uintptr_t paddr_host2guest(struct vmctx *ctx, void *addr);
#endif
struct vcpu;
struct vcpu *fbsdrun_vcpu(int vcpuid);
void fbsdrun_deletecpu(int vcpuid);
int fbsdrun_suspendcpu(int vcpuid);
int fbsdrun_virtio_msix(void);
typedef int (*vmexit_handler_t)(struct vmctx *, struct vcpu *, struct vm_run *);
#endif

34
usr.sbin/bhyve/vmexit.h Normal file
View file

@ -0,0 +1,34 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef _VMEXIT_H_
#define _VMEXIT_H_
extern const vmexit_handler_t vmexit_handlers[VM_EXITCODE_MAX];
#endif /* !_VMEXIT_H_ */