Initial support for bhyve save and restore.

Save and restore (also known as suspend and resume) permits a snapshot
to be taken of a guest's state that can later be resumed.  In the
current implementation, bhyve(8) creates a UNIX domain socket that is
used by bhyvectl(8) to send a request to save a snapshot (and
optionally exit after the snapshot has been taken).  A snapshot
currently consists of two files: the first holds a copy of guest RAM,
and the second file holds other guest state such as vCPU register
values and device model state.

To resume a guest, bhyve(8) must be started with a matching pair of
command line arguments to instantiate the same set of device models as
well as a pointer to the saved snapshot.

While the current implementation is useful for several uses cases, it
has a few limitations.  The file format for saving the guest state is
tied to the ABI of internal bhyve structures and is not
self-describing (in that it does not communicate the set of device
models present in the system).  In addition, the state saved for some
device models closely matches the internal data structures which might
prove a challenge for compatibility of snapshot files across a range
of bhyve versions.  The file format also does not currently support
versioning of individual chunks of state.  As a result, the current
file format is not a fixed binary format and future revisions to save
and restore will break binary compatiblity of snapshot files.  The
goal is to move to a more flexible format that adds versioning,
etc. and at that point to commit to providing a reasonable level of
compatibility.  As a result, the current implementation is not enabled
by default.  It can be enabled via the WITH_BHYVE_SNAPSHOT=yes option
for userland builds, and the kernel option BHYVE_SHAPSHOT.

Submitted by:	Mihai Tiganus, Flavius Anton, Darius Mihai
Submitted by:	Elena Mihailescu, Mihai Carabas, Sergiu Weisz
Relnotes:	yes
Sponsored by:	University Politehnica of Bucharest
Sponsored by:	Matthew Grooms (student scholarships)
Sponsored by:	iXsystems
Differential Revision:	https://reviews.freebsd.org/D19495
This commit is contained in:
John Baldwin 2020-05-05 00:02:04 +00:00
parent 51a5392297
commit 483d953a86
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=360648
71 changed files with 5615 additions and 48 deletions

View file

@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include <machine/specialreg.h>
#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
@ -53,8 +54,10 @@ __FBSDID("$FreeBSD$");
#include <libutil.h>
#include <vm/vm.h>
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
#include <machine/vmm_snapshot.h>
#include "vmmapi.h"
@ -237,6 +240,17 @@ vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid, vm_ooffset_t off,
return (error);
}
int
vm_get_guestmem_from_ctx(struct vmctx *ctx, char **guest_baseaddr,
size_t *lowmem_size, size_t *highmem_size)
{
*guest_baseaddr = ctx->baseaddr;
*lowmem_size = ctx->lowmem;
*highmem_size = ctx->highmem;
return (0);
}
int
vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid,
vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
@ -448,6 +462,34 @@ vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len)
return (NULL);
}
vm_paddr_t
vm_rev_map_gpa(struct vmctx *ctx, void *addr)
{
vm_paddr_t offaddr;
offaddr = (char *)addr - ctx->baseaddr;
if (ctx->lowmem > 0)
if (offaddr >= 0 && offaddr <= ctx->lowmem)
return (offaddr);
if (ctx->highmem > 0)
if (offaddr >= 4*GB && offaddr < 4*GB + ctx->highmem)
return (offaddr);
return ((vm_paddr_t)-1);
}
/* TODO: maximum size for vmname */
int
vm_get_name(struct vmctx *ctx, char *buf, size_t max_len)
{
if (strlcpy(buf, ctx->name, max_len) >= max_len)
return (EINVAL);
return (0);
}
size_t
vm_get_lowmem_size(struct vmctx *ctx)
{
@ -1501,6 +1543,29 @@ vm_restart_instruction(void *arg, int vcpu)
return (ioctl(ctx->fd, VM_RESTART_INSTRUCTION, &vcpu));
}
int
vm_snapshot_req(struct vm_snapshot_meta *meta)
{
if (ioctl(meta->ctx->fd, VM_SNAPSHOT_REQ, meta) == -1) {
#ifdef SNAPSHOT_DEBUG
fprintf(stderr, "%s: snapshot failed for %s: %d\r\n",
__func__, meta->dev_name, errno);
#endif
return (-1);
}
return (0);
}
int
vm_restore_time(struct vmctx *ctx)
{
int dummy;
dummy = 0;
return (ioctl(ctx->fd, VM_RESTORE_TIME, &dummy));
}
int
vm_set_topology(struct vmctx *ctx,
uint16_t sockets, uint16_t cores, uint16_t threads, uint16_t maxcpus)

View file

@ -33,6 +33,7 @@
#include <sys/param.h>
#include <sys/cpuset.h>
#include <machine/vmm_dev.h>
/*
* API version for out-of-tree consumers like grub-bhyve for making compile
@ -42,6 +43,7 @@
struct iovec;
struct vmctx;
struct vm_snapshot_meta;
enum x2apic_state;
/*
@ -88,6 +90,10 @@ int vm_get_memseg(struct vmctx *ctx, int ident, size_t *lenp, char *name,
*/
int vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid,
vm_ooffset_t *segoff, size_t *len, int *prot, int *flags);
int vm_get_guestmem_from_ctx(struct vmctx *ctx, char **guest_baseaddr,
size_t *lowmem_size, size_t *highmem_size);
/*
* Create a device memory segment identified by 'segid'.
*
@ -110,6 +116,8 @@ void vm_destroy(struct vmctx *ctx);
int vm_parse_memsize(const char *optarg, size_t *memsize);
int vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s);
void *vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len);
/* inverse operation to vm_map_gpa - extract guest address from host pointer */
vm_paddr_t vm_rev_map_gpa(struct vmctx *ctx, void *addr);
int vm_get_gpa_pmap(struct vmctx *, uint64_t gpa, uint64_t *pte, int *num);
int vm_gla2gpa(struct vmctx *, int vcpuid, struct vm_guest_paging *paging,
uint64_t gla, int prot, uint64_t *gpa, int *fault);
@ -120,6 +128,7 @@ uint32_t vm_get_lowmem_limit(struct vmctx *ctx);
void vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit);
void vm_set_memflags(struct vmctx *ctx, int flags);
int vm_get_memflags(struct vmctx *ctx);
int vm_get_name(struct vmctx *ctx, char *buffer, size_t max_len);
size_t vm_get_lowmem_size(struct vmctx *ctx);
size_t vm_get_highmem_size(struct vmctx *ctx);
int vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
@ -237,4 +246,24 @@ int vm_setup_freebsd_registers_i386(struct vmctx *vmctx, int vcpu,
uint32_t eip, uint32_t gdtbase,
uint32_t esp);
void vm_setup_freebsd_gdt(uint64_t *gdtr);
/*
* Save and restore
*/
#define MAX_SNAPSHOT_VMNAME 100
enum checkpoint_opcodes {
START_CHECKPOINT = 0,
START_SUSPEND = 1,
};
struct checkpoint_op {
unsigned int op;
char snapshot_filename[MAX_SNAPSHOT_VMNAME];
};
int vm_snapshot_req(struct vm_snapshot_meta *meta);
int vm_restore_time(struct vmctx *ctx);
#endif /* _VMMAPI_H_ */

View file

@ -1,6 +1,6 @@
.\" DO NOT EDIT-- this file is @generated by tools/build/options/makeman.
.\" $FreeBSD$
.Dd April 30, 2020
.Dd May 4, 2020
.Dt SRC.CONF 5
.Os
.Sh NAME
@ -170,6 +170,13 @@ Set to not build or install
associated utilities, and examples.
.Pp
This option only affects amd64/amd64.
.It Va WITH_BHYVE_SNAPSHOT
Set to include support for save and restore (snapshots) in
.Xr bhyve 8
and
.Xr bhyvectl 8 .
.Pp
This option only affects amd64/amd64.
.It Va WITH_BIND_NOW
Build all binaries with the
.Dv DF_BIND_NOW

View file

@ -200,6 +200,7 @@ __DEFAULT_YES_OPTIONS = \
__DEFAULT_NO_OPTIONS = \
BEARSSL \
BHYVE_SNAPSHOT \
BSD_GREP \
CLANG_EXTRAS \
DTRACE_TESTS \

View file

@ -34,6 +34,8 @@
#include <sys/sdt.h>
#include <x86/segments.h>
struct vm_snapshot_meta;
#ifdef _KERNEL
SDT_PROVIDER_DECLARE(vmm);
#endif
@ -152,6 +154,7 @@ struct vmspace;
struct vm_object;
struct vm_guest_paging;
struct pmap;
enum snapshot_req;
struct vm_eventinfo {
void *rptr; /* rendezvous cookie */
@ -180,6 +183,10 @@ typedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max);
typedef void (*vmi_vmspace_free)(struct vmspace *vmspace);
typedef struct vlapic * (*vmi_vlapic_init)(void *vmi, int vcpu);
typedef void (*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic);
typedef int (*vmi_snapshot_t)(void *vmi, struct vm_snapshot_meta *meta);
typedef int (*vmi_snapshot_vmcx_t)(void *vmi, struct vm_snapshot_meta *meta,
int vcpu);
typedef int (*vmi_restore_tsc_t)(void *vmi, int vcpuid, uint64_t now);
struct vmm_ops {
vmm_init_func_t init; /* module wide initialization */
@ -199,6 +206,11 @@ struct vmm_ops {
vmi_vmspace_free vmspace_free;
vmi_vlapic_init vlapic_init;
vmi_vlapic_cleanup vlapic_cleanup;
/* checkpoint operations */
vmi_snapshot_t vmsnapshot;
vmi_snapshot_vmcx_t vmcx_snapshot;
vmi_restore_tsc_t vm_restore_tsc;
};
extern struct vmm_ops vmm_ops_intel;
@ -272,6 +284,9 @@ void vm_exit_debug(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip);
int vm_snapshot_req(struct vm *vm, struct vm_snapshot_meta *meta);
int vm_restore_time(struct vm *vm);
#ifdef _SYS__CPUSET_H_
/*
@ -409,6 +424,15 @@ int vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *info);
int vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2);
/*
* Function used to keep track of the guest's TSC offset. The
* offset is used by the virutalization extensions to provide a consistent
* value for the Time Stamp Counter to the guest.
*
* Return value is 0 on success and non-zero on failure.
*/
int vm_set_tsc_offset(struct vm *vm, int vcpu_id, uint64_t offset);
enum vm_reg_name vm_segment_name(int seg_encoding);
struct vm_copyinfo {

View file

@ -31,6 +31,8 @@
#ifndef _VMM_DEV_H_
#define _VMM_DEV_H_
struct vm_snapshot_meta;
#ifdef _KERNEL
void vmmdev_init(void);
int vmmdev_cleanup(void);
@ -312,6 +314,11 @@ enum {
IOCNUM_RTC_WRITE = 101,
IOCNUM_RTC_SETTIME = 102,
IOCNUM_RTC_GETTIME = 103,
/* checkpoint */
IOCNUM_SNAPSHOT_REQ = 113,
IOCNUM_RESTORE_TIME = 115
};
#define VM_RUN \
@ -422,4 +429,8 @@ enum {
_IOR('v', IOCNUM_RTC_GETTIME, struct vm_rtc_time)
#define VM_RESTART_INSTRUCTION \
_IOW('v', IOCNUM_RESTART_INSTRUCTION, int)
#define VM_SNAPSHOT_REQ \
_IOWR('v', IOCNUM_SNAPSHOT_REQ, struct vm_snapshot_meta)
#define VM_RESTORE_TIME \
_IOWR('v', IOCNUM_RESTORE_TIME, int)
#endif

View file

@ -0,0 +1,156 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2016 Flavius Anton
* Copyright (c) 2016 Mihai Tiganus
* Copyright (c) 2016-2019 Mihai Carabas
* Copyright (c) 2017-2019 Darius Mihai
* Copyright (c) 2017-2019 Elena Mihailescu
* Copyright (c) 2018-2019 Sergiu Weisz
* All rights reserved.
* The bhyve-snapshot feature was developed under sponsorships
* from Matthew Grooms.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _VMM_SNAPSHOT_
#define _VMM_SNAPSHOT_
#include <sys/errno.h>
#include <sys/types.h>
#ifndef _KERNEL
#include <stdbool.h>
#endif
struct vmctx;
enum snapshot_req {
STRUCT_VMX,
STRUCT_VIOAPIC,
STRUCT_VM,
STRUCT_VLAPIC,
VM_MEM,
STRUCT_VHPET,
STRUCT_VMCX,
STRUCT_VATPIC,
STRUCT_VATPIT,
STRUCT_VPMTMR,
STRUCT_VRTC,
};
struct vm_snapshot_buffer {
/*
* R/O for device-specific functions;
* written by generic snapshot functions.
*/
uint8_t *const buf_start;
const size_t buf_size;
/*
* R/W for device-specific functions used to keep track of buffer
* current position and remaining size.
*/
uint8_t *buf;
size_t buf_rem;
/*
* Length of the snapshot is either determined as (buf_size - buf_rem)
* or (buf - buf_start) -- the second variation returns a signed value
* so it may not be appropriate.
*
* Use vm_get_snapshot_size(meta).
*/
};
enum vm_snapshot_op {
VM_SNAPSHOT_SAVE,
VM_SNAPSHOT_RESTORE,
};
struct vm_snapshot_meta {
struct vmctx *ctx;
void *dev_data;
const char *dev_name; /* identify userspace devices */
enum snapshot_req dev_req; /* identify kernel structs */
struct vm_snapshot_buffer buffer;
enum vm_snapshot_op op;
};
void vm_snapshot_buf_err(const char *bufname, const enum vm_snapshot_op op);
int vm_snapshot_buf(volatile void *data, size_t data_size,
struct vm_snapshot_meta *meta);
size_t vm_get_snapshot_size(struct vm_snapshot_meta *meta);
int vm_snapshot_guest2host_addr(void **addrp, size_t len, bool restore_null,
struct vm_snapshot_meta *meta);
int vm_snapshot_buf_cmp(volatile void *data, size_t data_size,
struct vm_snapshot_meta *meta);
#define SNAPSHOT_BUF_OR_LEAVE(DATA, LEN, META, RES, LABEL) \
do { \
(RES) = vm_snapshot_buf((DATA), (LEN), (META)); \
if ((RES) != 0) { \
vm_snapshot_buf_err(#DATA, (META)->op); \
goto LABEL; \
} \
} while (0)
#define SNAPSHOT_VAR_OR_LEAVE(DATA, META, RES, LABEL) \
SNAPSHOT_BUF_OR_LEAVE(&(DATA), sizeof(DATA), (META), (RES), LABEL)
/*
* Address variables are pointers to guest memory.
*
* When RNULL != 0, do not enforce invalid address checks; instead, make the
* pointer NULL at restore time.
*/
#define SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(ADDR, LEN, RNULL, META, RES, LABEL) \
do { \
(RES) = vm_snapshot_guest2host_addr((void **)&(ADDR), (LEN), (RNULL), \
(META)); \
if ((RES) != 0) { \
if ((RES) == EFAULT) \
fprintf(stderr, "%s: invalid address: %s\r\n", \
__func__, #ADDR); \
goto LABEL; \
} \
} while (0)
/* compare the value in the meta buffer with the data */
#define SNAPSHOT_BUF_CMP_OR_LEAVE(DATA, LEN, META, RES, LABEL) \
do { \
(RES) = vm_snapshot_buf_cmp((DATA), (LEN), (META)); \
if ((RES) != 0) { \
vm_snapshot_buf_err(#DATA, (META)->op); \
goto LABEL; \
} \
} while (0)
#define SNAPSHOT_VAR_CMP_OR_LEAVE(DATA, META, RES, LABEL) \
SNAPSHOT_BUF_CMP_OR_LEAVE(&(DATA), sizeof(DATA), (META), (RES), LABEL)
#endif

View file

@ -29,6 +29,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_bhyve_snapshot.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/smp.h>
@ -50,6 +52,7 @@ __FBSDID("$FreeBSD$");
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
#include <machine/vmm_instruction_emul.h>
#include <machine/vmm_snapshot.h>
#include "vmm_lapic.h"
#include "vmm_stat.h"
@ -276,6 +279,25 @@ svm_restore(void)
svm_enable(NULL);
}
#ifdef BHYVE_SNAPSHOT
int
svm_set_tsc_offset(struct svm_softc *sc, int vcpu, uint64_t offset)
{
int error;
struct vmcb_ctrl *ctrl;
ctrl = svm_get_vmcb_ctrl(sc, vcpu);
ctrl->tsc_offset = offset;
svm_set_dirty(sc, vcpu, VMCB_CACHE_I);
VCPU_CTR1(sc->vm, vcpu, "tsc offset changed to %#lx", offset);
error = vm_set_tsc_offset(sc->vm, vcpu, offset);
return (error);
}
#endif
/* Pentium compatible MSRs */
#define MSR_PENTIUM_START 0
#define MSR_PENTIUM_END 0x1FFF
@ -2203,6 +2225,36 @@ svm_setreg(void *arg, int vcpu, int ident, uint64_t val)
return (EINVAL);
}
#ifdef BHYVE_SNAPSHOT
static int
svm_snapshot_reg(void *arg, int vcpu, int ident,
struct vm_snapshot_meta *meta)
{
int ret;
uint64_t val;
if (meta->op == VM_SNAPSHOT_SAVE) {
ret = svm_getreg(arg, vcpu, ident, &val);
if (ret != 0)
goto done;
SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
} else if (meta->op == VM_SNAPSHOT_RESTORE) {
SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
ret = svm_setreg(arg, vcpu, ident, val);
if (ret != 0)
goto done;
} else {
ret = EINVAL;
goto done;
}
done:
return (ret);
}
#endif
static int
svm_setcap(void *arg, int vcpu, int type, int val)
{
@ -2285,6 +2337,306 @@ svm_vlapic_cleanup(void *arg, struct vlapic *vlapic)
free(vlapic, M_SVM_VLAPIC);
}
#ifdef BHYVE_SNAPSHOT
static int
svm_snapshot_vmi(void *arg, struct vm_snapshot_meta *meta)
{
/* struct svm_softc is AMD's representation for SVM softc */
struct svm_softc *sc;
struct svm_vcpu *vcpu;
struct vmcb *vmcb;
uint64_t val;
int i;
int ret;
sc = arg;
KASSERT(sc != NULL, ("%s: arg was NULL", __func__));
SNAPSHOT_VAR_OR_LEAVE(sc->nptp, meta, ret, done);
for (i = 0; i < VM_MAXCPU; i++) {
vcpu = &sc->vcpu[i];
vmcb = &vcpu->vmcb;
/* VMCB fields for virtual cpu i */
SNAPSHOT_VAR_OR_LEAVE(vmcb->ctrl.v_tpr, meta, ret, done);
val = vmcb->ctrl.v_tpr;
SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
vmcb->ctrl.v_tpr = val;
SNAPSHOT_VAR_OR_LEAVE(vmcb->ctrl.asid, meta, ret, done);
val = vmcb->ctrl.np_enable;
SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
vmcb->ctrl.np_enable = val;
val = vmcb->ctrl.intr_shadow;
SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
vmcb->ctrl.intr_shadow = val;
SNAPSHOT_VAR_OR_LEAVE(vmcb->ctrl.tlb_ctrl, meta, ret, done);
SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad1,
sizeof(vmcb->state.pad1),
meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cpl, meta, ret, done);
SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad2,
sizeof(vmcb->state.pad2),
meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmcb->state.efer, meta, ret, done);
SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad3,
sizeof(vmcb->state.pad3),
meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cr4, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cr3, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cr0, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmcb->state.dr7, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmcb->state.dr6, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmcb->state.rflags, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmcb->state.rip, meta, ret, done);
SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad4,
sizeof(vmcb->state.pad4),
meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmcb->state.rsp, meta, ret, done);
SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad5,
sizeof(vmcb->state.pad5),
meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmcb->state.rax, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmcb->state.star, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmcb->state.lstar, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cstar, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmcb->state.sfmask, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmcb->state.kernelgsbase,
meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmcb->state.sysenter_cs, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmcb->state.sysenter_esp,
meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmcb->state.sysenter_eip,
meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cr2, meta, ret, done);
SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad6,
sizeof(vmcb->state.pad6),
meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmcb->state.g_pat, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmcb->state.dbgctl, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmcb->state.br_from, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmcb->state.br_to, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmcb->state.int_from, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmcb->state.int_to, meta, ret, done);
SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad7,
sizeof(vmcb->state.pad7),
meta, ret, done);
/* Snapshot swctx for virtual cpu i */
SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rbp, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rbx, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rcx, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rdx, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rdi, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rsi, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r8, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r9, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r10, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r11, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r12, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r13, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r14, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r15, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_dr0, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_dr1, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_dr2, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_dr3, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr0, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr1, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr2, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr3, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr6, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr7, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_debugctl, meta, ret,
done);
/* Restore other svm_vcpu struct fields */
/* Restore NEXTRIP field */
SNAPSHOT_VAR_OR_LEAVE(vcpu->nextrip, meta, ret, done);
/* Restore lastcpu field */
SNAPSHOT_VAR_OR_LEAVE(vcpu->lastcpu, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->dirty, meta, ret, done);
/* Restore EPTGEN field - EPT is Extended Page Tabel */
SNAPSHOT_VAR_OR_LEAVE(vcpu->eptgen, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->asid.gen, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->asid.num, meta, ret, done);
/* Set all caches dirty */
if (meta->op == VM_SNAPSHOT_RESTORE) {
svm_set_dirty(sc, i, VMCB_CACHE_ASID);
svm_set_dirty(sc, i, VMCB_CACHE_IOPM);
svm_set_dirty(sc, i, VMCB_CACHE_I);
svm_set_dirty(sc, i, VMCB_CACHE_TPR);
svm_set_dirty(sc, i, VMCB_CACHE_CR2);
svm_set_dirty(sc, i, VMCB_CACHE_CR);
svm_set_dirty(sc, i, VMCB_CACHE_DT);
svm_set_dirty(sc, i, VMCB_CACHE_SEG);
svm_set_dirty(sc, i, VMCB_CACHE_NP);
}
}
if (meta->op == VM_SNAPSHOT_RESTORE)
flush_by_asid();
done:
return (ret);
}
static int
svm_snapshot_vmcx(void *arg, struct vm_snapshot_meta *meta, int vcpu)
{
struct vmcb *vmcb;
struct svm_softc *sc;
int err, running, hostcpu;
sc = (struct svm_softc *)arg;
err = 0;
KASSERT(arg != NULL, ("%s: arg was NULL", __func__));
vmcb = svm_get_vmcb(sc, vcpu);
running = vcpu_is_running(sc->vm, vcpu, &hostcpu);
if (running && hostcpu !=curcpu) {
printf("%s: %s%d is running", __func__, vm_name(sc->vm), vcpu);
return (EINVAL);
}
err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CR0, meta);
err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CR2, meta);
err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CR3, meta);
err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CR4, meta);
err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_DR7, meta);
err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_RAX, meta);
err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_RSP, meta);
err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_RIP, meta);
err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_RFLAGS, meta);
/* Guest segments */
/* ES */
err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_ES, meta);
err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_ES, meta);
/* CS */
err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CS, meta);
err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_CS, meta);
/* SS */
err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_SS, meta);
err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_SS, meta);
/* DS */
err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_DS, meta);
err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_DS, meta);
/* FS */
err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_FS, meta);
err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_FS, meta);
/* GS */
err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_GS, meta);
err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_GS, meta);
/* TR */
err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_TR, meta);
err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_TR, meta);
/* LDTR */
err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_LDTR, meta);
err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_LDTR, meta);
/* EFER */
err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_EFER, meta);
/* IDTR and GDTR */
err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_IDTR, meta);
err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_GDTR, meta);
/* Specific AMD registers */
err += vmcb_snapshot_any(sc, vcpu,
VMCB_ACCESS(VMCB_OFF_SYSENTER_CS, 8), meta);
err += vmcb_snapshot_any(sc, vcpu,
VMCB_ACCESS(VMCB_OFF_SYSENTER_ESP, 8), meta);
err += vmcb_snapshot_any(sc, vcpu,
VMCB_ACCESS(VMCB_OFF_SYSENTER_EIP, 8), meta);
err += vmcb_snapshot_any(sc, vcpu,
VMCB_ACCESS(VMCB_OFF_NPT_BASE, 8), meta);
err += vmcb_snapshot_any(sc, vcpu,
VMCB_ACCESS(VMCB_OFF_CR_INTERCEPT, 4), meta);
err += vmcb_snapshot_any(sc, vcpu,
VMCB_ACCESS(VMCB_OFF_DR_INTERCEPT, 4), meta);
err += vmcb_snapshot_any(sc, vcpu,
VMCB_ACCESS(VMCB_OFF_EXC_INTERCEPT, 4), meta);
err += vmcb_snapshot_any(sc, vcpu,
VMCB_ACCESS(VMCB_OFF_INST1_INTERCEPT, 4), meta);
err += vmcb_snapshot_any(sc, vcpu,
VMCB_ACCESS(VMCB_OFF_INST2_INTERCEPT, 4), meta);
err += vmcb_snapshot_any(sc, vcpu,
VMCB_ACCESS(VMCB_OFF_TLB_CTRL, 4), meta);
err += vmcb_snapshot_any(sc, vcpu,
VMCB_ACCESS(VMCB_OFF_EXITINFO1, 8), meta);
err += vmcb_snapshot_any(sc, vcpu,
VMCB_ACCESS(VMCB_OFF_EXITINFO2, 8), meta);
err += vmcb_snapshot_any(sc, vcpu,
VMCB_ACCESS(VMCB_OFF_EXITINTINFO, 8), meta);
err += vmcb_snapshot_any(sc, vcpu,
VMCB_ACCESS(VMCB_OFF_VIRQ, 8), meta);
err += vmcb_snapshot_any(sc, vcpu,
VMCB_ACCESS(VMCB_OFF_GUEST_PAT, 8), meta);
err += vmcb_snapshot_any(sc, vcpu,
VMCB_ACCESS(VMCB_OFF_AVIC_BAR, 8), meta);
err += vmcb_snapshot_any(sc, vcpu,
VMCB_ACCESS(VMCB_OFF_AVIC_PAGE, 8), meta);
err += vmcb_snapshot_any(sc, vcpu,
VMCB_ACCESS(VMCB_OFF_AVIC_LT, 8), meta);
err += vmcb_snapshot_any(sc, vcpu,
VMCB_ACCESS(VMCB_OFF_AVIC_PT, 8), meta);
err += vmcb_snapshot_any(sc, vcpu,
VMCB_ACCESS(VMCB_OFF_IO_PERM, 8), meta);
err += vmcb_snapshot_any(sc, vcpu,
VMCB_ACCESS(VMCB_OFF_MSR_PERM, 8), meta);
err += vmcb_snapshot_any(sc, vcpu,
VMCB_ACCESS(VMCB_OFF_ASID, 4), meta);
err += vmcb_snapshot_any(sc, vcpu,
VMCB_ACCESS(VMCB_OFF_EXIT_REASON, 8), meta);
err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_INTR_SHADOW, meta);
return (err);
}
static int
svm_restore_tsc(void *arg, int vcpu, uint64_t offset)
{
int err;
err = svm_set_tsc_offset(arg, vcpu, offset);
return (err);
}
#endif
struct vmm_ops vmm_ops_amd = {
.init = svm_init,
.cleanup = svm_cleanup,
@ -2302,4 +2654,9 @@ struct vmm_ops vmm_ops_amd = {
.vmspace_free = svm_npt_free,
.vlapic_init = svm_vlapic_init,
.vlapic_cleanup = svm_vlapic_cleanup,
#ifdef BHYVE_SNAPSHOT
.vmsnapshot = svm_snapshot_vmi,
.vmcx_snapshot = svm_snapshot_vmcx,
.vm_restore_tsc = svm_restore_tsc,
#endif
};

View file

@ -32,6 +32,7 @@
#define _SVM_H_
struct pcpu;
struct svm_softc;
/*
* Guest register state that is saved outside the VMCB.
@ -66,5 +67,8 @@ struct svm_regctx {
};
void svm_launch(uint64_t pa, struct svm_regctx *gctx, struct pcpu *pcpu);
#ifdef BHYVE_SNAPSHOT
int svm_set_tsc_offset(struct svm_softc *sc, int vcpu, uint64_t offset);
#endif
#endif /* _SVM_H_ */

View file

@ -29,6 +29,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_bhyve_snapshot.h"
#include <sys/param.h>
#include <sys/errno.h>
#include <sys/systm.h>
@ -162,6 +164,11 @@ svm_wrmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t val, bool *retu)
* Ignore writes to microcode update register.
*/
break;
#ifdef BHYVE_SNAPSHOT
case MSR_TSC:
error = svm_set_tsc_offset(sc, vcpu, val - rdtsc());
break;
#endif
case MSR_EXTFEATURES:
break;
default:

View file

@ -29,12 +29,15 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_bhyve_snapshot.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <machine/segments.h>
#include <machine/specialreg.h>
#include <machine/vmm.h>
#include <machine/vmm_snapshot.h>
#include "vmm_ktr.h"
@ -452,3 +455,106 @@ vmcb_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc)
return (0);
}
#ifdef BHYVE_SNAPSHOT
int
vmcb_getany(struct svm_softc *sc, int vcpu, int ident, uint64_t *val)
{
int error = 0;
if (vcpu < 0 || vcpu >= VM_MAXCPU) {
error = EINVAL;
goto err;
}
if (ident >= VM_REG_LAST) {
error = EINVAL;
goto err;
}
error = vm_get_register(sc->vm, vcpu, ident, val);
err:
return (error);
}
int
vmcb_setany(struct svm_softc *sc, int vcpu, int ident, uint64_t val)
{
int error = 0;
if (vcpu < 0 || vcpu >= VM_MAXCPU) {
error = EINVAL;
goto err;
}
if (ident >= VM_REG_LAST) {
error = EINVAL;
goto err;
}
error = vm_set_register(sc->vm, vcpu, ident, val);
err:
return (error);
}
int
vmcb_snapshot_desc(void *arg, int vcpu, int reg, struct vm_snapshot_meta *meta)
{
int ret;
struct seg_desc desc;
if (meta->op == VM_SNAPSHOT_SAVE) {
ret = vmcb_getdesc(arg, vcpu, reg, &desc);
if (ret != 0)
goto done;
SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
} else if (meta->op == VM_SNAPSHOT_RESTORE) {
SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
ret = vmcb_setdesc(arg, vcpu, reg, &desc);
if (ret != 0)
goto done;
} else {
ret = EINVAL;
goto done;
}
done:
return (ret);
}
int
vmcb_snapshot_any(struct svm_softc *sc, int vcpu, int ident,
struct vm_snapshot_meta *meta)
{
int ret;
uint64_t val;
if (meta->op == VM_SNAPSHOT_SAVE) {
ret = vmcb_getany(sc, vcpu, ident, &val);
if (ret != 0)
goto done;
SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
} else if (meta->op == VM_SNAPSHOT_RESTORE) {
SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
ret = vmcb_setany(sc, vcpu, ident, val);
if (ret != 0)
goto done;
} else {
ret = EINVAL;
goto done;
}
done:
return (ret);
}
#endif

View file

@ -31,8 +31,6 @@
#ifndef _VMCB_H_
#define _VMCB_H_
struct svm_softc;
#define BIT(n) (1ULL << n)
/*
@ -209,6 +207,10 @@ struct svm_softc;
#define VMCB_ACCESS_OFFSET(v) ((v) & 0xFFF)
#ifdef _KERNEL
struct svm_softc;
struct vm_snapshot_meta;
/* VMCB save state area segment format */
struct vmcb_segment {
uint16_t selector;
@ -331,6 +333,14 @@ int vmcb_write(struct svm_softc *sc, int vcpu, int ident, uint64_t val);
int vmcb_setdesc(void *arg, int vcpu, int ident, struct seg_desc *desc);
int vmcb_getdesc(void *arg, int vcpu, int ident, struct seg_desc *desc);
int vmcb_seg(struct vmcb *vmcb, int ident, struct vmcb_segment *seg);
#ifdef BHYVE_SNAPSHOT
int vmcb_getany(struct svm_softc *sc, int vcpu, int ident, uint64_t *val);
int vmcb_setany(struct svm_softc *sc, int vcpu, int ident, uint64_t val);
int vmcb_snapshot_desc(void *arg, int vcpu, int reg,
struct vm_snapshot_meta *meta);
int vmcb_snapshot_any(struct svm_softc *sc, int vcpu, int ident,
struct vm_snapshot_meta *meta);
#endif
#endif /* _KERNEL */
#endif /* _VMCB_H_ */

View file

@ -28,6 +28,7 @@
* $FreeBSD$
*/
#include "opt_bhyve_snapshot.h"
#include "opt_ddb.h"
#include <sys/cdefs.h>
@ -43,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include <machine/segments.h>
#include <machine/vmm.h>
#include <machine/vmm_snapshot.h>
#include "vmm_host.h"
#include "vmx_cpufunc.h"
#include "vmcs.h"
@ -430,6 +432,128 @@ vmcs_init(struct vmcs *vmcs)
return (error);
}
#ifdef BHYVE_SNAPSHOT
int
vmcs_getany(struct vmcs *vmcs, int running, int ident, uint64_t *val)
{
int error;
if (!running)
VMPTRLD(vmcs);
error = vmread(ident, val);
if (!running)
VMCLEAR(vmcs);
return (error);
}
int
vmcs_setany(struct vmcs *vmcs, int running, int ident, uint64_t val)
{
int error;
if (!running)
VMPTRLD(vmcs);
error = vmwrite(ident, val);
if (!running)
VMCLEAR(vmcs);
return (error);
}
int
vmcs_snapshot_reg(struct vmcs *vmcs, int running, int ident,
struct vm_snapshot_meta *meta)
{
int ret;
uint64_t val;
if (meta->op == VM_SNAPSHOT_SAVE) {
ret = vmcs_getreg(vmcs, running, ident, &val);
if (ret != 0)
goto done;
SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
} else if (meta->op == VM_SNAPSHOT_RESTORE) {
SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
ret = vmcs_setreg(vmcs, running, ident, val);
if (ret != 0)
goto done;
} else {
ret = EINVAL;
goto done;
}
done:
return (ret);
}
int
vmcs_snapshot_desc(struct vmcs *vmcs, int running, int seg,
struct vm_snapshot_meta *meta)
{
int ret;
struct seg_desc desc;
if (meta->op == VM_SNAPSHOT_SAVE) {
ret = vmcs_getdesc(vmcs, running, seg, &desc);
if (ret != 0)
goto done;
SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
} else if (meta->op == VM_SNAPSHOT_RESTORE) {
SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
ret = vmcs_setdesc(vmcs, running, seg, &desc);
if (ret != 0)
goto done;
} else {
ret = EINVAL;
goto done;
}
done:
return (ret);
}
int
vmcs_snapshot_any(struct vmcs *vmcs, int running, int ident,
struct vm_snapshot_meta *meta)
{
int ret;
uint64_t val;
if (meta->op == VM_SNAPSHOT_SAVE) {
ret = vmcs_getany(vmcs, running, ident, &val);
if (ret != 0)
goto done;
SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
} else if (meta->op == VM_SNAPSHOT_RESTORE) {
SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
ret = vmcs_setany(vmcs, running, ident, val);
if (ret != 0)
goto done;
} else {
ret = EINVAL;
goto done;
}
done:
return (ret);
}
#endif
#ifdef DDB
extern int vmxon_enabled[];

View file

@ -32,6 +32,9 @@
#define _VMCS_H_
#ifdef _KERNEL
struct vm_snapshot_meta;
struct vmcs {
uint32_t identifier;
uint32_t abort_code;
@ -55,6 +58,16 @@ int vmcs_getdesc(struct vmcs *vmcs, int running, int ident,
struct seg_desc *desc);
int vmcs_setdesc(struct vmcs *vmcs, int running, int ident,
struct seg_desc *desc);
#ifdef BHYVE_SNAPSHOT
int vmcs_getany(struct vmcs *vmcs, int running, int ident, uint64_t *val);
int vmcs_setany(struct vmcs *vmcs, int running, int ident, uint64_t val);
int vmcs_snapshot_reg(struct vmcs *vmcs, int running, int ident,
struct vm_snapshot_meta *meta);
int vmcs_snapshot_desc(struct vmcs *vmcs, int running, int seg,
struct vm_snapshot_meta *meta);
int vmcs_snapshot_any(struct vmcs *vmcs, int running, int ident,
struct vm_snapshot_meta *meta);
#endif
/*
* Avoid header pollution caused by inline use of 'vtophys()' in vmx_cpufunc.h

View file

@ -32,6 +32,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_bhyve_snapshot.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/smp.h>
@ -56,6 +58,8 @@ __FBSDID("$FreeBSD$");
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
#include <machine/vmm_instruction_emul.h>
#include <machine/vmm_snapshot.h>
#include "vmm_lapic.h"
#include "vmm_host.h"
#include "vmm_ioport.h"
@ -295,6 +299,9 @@ static int vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc);
static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval);
static int vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val);
static void vmx_inject_pir(struct vlapic *vlapic);
#ifdef BHYVE_SNAPSHOT
static int vmx_restore_tsc(void *arg, int vcpu, uint64_t now);
#endif
#ifdef KTR
static const char *
@ -1299,7 +1306,10 @@ vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset)
}
error = vmwrite(VMCS_TSC_OFFSET, offset);
#ifdef BHYVE_SNAPSHOT
if (error == 0)
error = vm_set_tsc_offset(vmx->vm, vcpu, offset);
#endif
return (error);
}
@ -3876,6 +3886,153 @@ vmx_vlapic_cleanup(void *arg, struct vlapic *vlapic)
free(vlapic, M_VLAPIC);
}
#ifdef BHYVE_SNAPSHOT
static int
vmx_snapshot_vmi(void *arg, struct vm_snapshot_meta *meta)
{
struct vmx *vmx;
struct vmxctx *vmxctx;
int i;
int ret;
vmx = arg;
KASSERT(vmx != NULL, ("%s: arg was NULL", __func__));
for (i = 0; i < VM_MAXCPU; i++) {
SNAPSHOT_BUF_OR_LEAVE(vmx->guest_msrs[i],
sizeof(vmx->guest_msrs[i]), meta, ret, done);
vmxctx = &vmx->ctx[i];
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rdi, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rsi, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rdx, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rcx, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r8, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r9, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rax, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rbx, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rbp, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r10, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r11, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r12, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r13, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r14, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r15, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_cr2, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr0, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr1, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr2, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr3, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr6, meta, ret, done);
}
done:
return (ret);
}
static int
vmx_snapshot_vmcx(void *arg, struct vm_snapshot_meta *meta, int vcpu)
{
struct vmcs *vmcs;
struct vmx *vmx;
int err, run, hostcpu;
vmx = (struct vmx *)arg;
err = 0;
KASSERT(arg != NULL, ("%s: arg was NULL", __func__));
vmcs = &vmx->vmcs[vcpu];
run = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
if (run && hostcpu != curcpu) {
printf("%s: %s%d is running", __func__, vm_name(vmx->vm), vcpu);
return (EINVAL);
}
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_CR0, meta);
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_CR3, meta);
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_CR4, meta);
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_DR7, meta);
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_RSP, meta);
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_RIP, meta);
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_RFLAGS, meta);
/* Guest segments */
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_ES, meta);
err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_ES, meta);
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_CS, meta);
err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_CS, meta);
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_SS, meta);
err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_SS, meta);
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_DS, meta);
err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_DS, meta);
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_FS, meta);
err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_FS, meta);
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_GS, meta);
err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_GS, meta);
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_TR, meta);
err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_TR, meta);
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_LDTR, meta);
err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_LDTR, meta);
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_EFER, meta);
err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_IDTR, meta);
err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_GDTR, meta);
/* Guest page tables */
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_PDPTE0, meta);
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_PDPTE1, meta);
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_PDPTE2, meta);
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_PDPTE3, meta);
/* Other guest state */
err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_IA32_SYSENTER_CS, meta);
err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_IA32_SYSENTER_ESP, meta);
err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_IA32_SYSENTER_EIP, meta);
err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_INTERRUPTIBILITY, meta);
err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_ACTIVITY, meta);
err += vmcs_snapshot_any(vmcs, run, VMCS_ENTRY_CTLS, meta);
err += vmcs_snapshot_any(vmcs, run, VMCS_EXIT_CTLS, meta);
return (err);
}
static int
vmx_restore_tsc(void *arg, int vcpu, uint64_t offset)
{
struct vmcs *vmcs;
struct vmx *vmx = (struct vmx *)arg;
int error, running, hostcpu;
KASSERT(arg != NULL, ("%s: arg was NULL", __func__));
vmcs = &vmx->vmcs[vcpu];
running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
if (running && hostcpu != curcpu) {
printf("%s: %s%d is running", __func__, vm_name(vmx->vm), vcpu);
return (EINVAL);
}
if (!running)
VMPTRLD(vmcs);
error = vmx_set_tsc_offset(vmx, vcpu, offset);
if (!running)
VMCLEAR(vmcs);
return (error);
}
#endif
struct vmm_ops vmm_ops_intel = {
.init = vmx_init,
.cleanup = vmx_cleanup,
@ -3893,4 +4050,9 @@ struct vmm_ops vmm_ops_intel = {
.vmspace_free = ept_vmspace_free,
.vlapic_init = vmx_vlapic_init,
.vlapic_cleanup = vmx_vlapic_cleanup,
#ifdef BHYVE_SNAPSHOT
.vmsnapshot = vmx_snapshot_vmi,
.vmcx_snapshot = vmx_snapshot_vmcx,
.vm_restore_tsc = vmx_restore_tsc,
#endif
};

View file

@ -29,6 +29,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_bhyve_snapshot.h"
#include <sys/param.h>
#include <sys/types.h>
#include <sys/queue.h>
@ -42,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include <dev/ic/i8259.h>
#include <machine/vmm.h>
#include <machine/vmm_snapshot.h>
#include "vmm_ktr.h"
#include "vmm_lapic.h"
@ -808,3 +811,43 @@ vatpic_cleanup(struct vatpic *vatpic)
{
free(vatpic, M_VATPIC);
}
#ifdef BHYVE_SNAPSHOT
int
vatpic_snapshot(struct vatpic *vatpic, struct vm_snapshot_meta *meta)
{
int ret;
int i;
struct atpic *atpic;
for (i = 0; i < nitems(vatpic->atpic); i++) {
atpic = &vatpic->atpic[i];
SNAPSHOT_VAR_OR_LEAVE(atpic->ready, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(atpic->icw_num, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(atpic->rd_cmd_reg, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(atpic->aeoi, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(atpic->poll, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(atpic->rotate, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(atpic->sfn, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(atpic->irq_base, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(atpic->request, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(atpic->service, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(atpic->mask, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(atpic->smm, meta, ret, done);
SNAPSHOT_BUF_OR_LEAVE(atpic->acnt, sizeof(atpic->acnt),
meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(atpic->lowprio, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(atpic->intr_raised, meta, ret, done);
}
SNAPSHOT_BUF_OR_LEAVE(vatpic->elc, sizeof(vatpic->elc),
meta, ret, done);
done:
return (ret);
}
#endif

View file

@ -36,6 +36,8 @@
#define IO_ELCR1 0x4d0
#define IO_ELCR2 0x4d1
struct vm_snapshot_meta;
struct vatpic *vatpic_init(struct vm *vm);
void vatpic_cleanup(struct vatpic *vatpic);
@ -54,4 +56,8 @@ int vatpic_set_irq_trigger(struct vm *vm, int irq, enum vm_intr_trigger trigger)
void vatpic_pending_intr(struct vm *vm, int *vecptr);
void vatpic_intr_accepted(struct vm *vm, int vector);
#ifdef BHYVE_SNAPSHOT
int vatpic_snapshot(struct vatpic *vatpic, struct vm_snapshot_meta *meta);
#endif
#endif /* _VATPIC_H_ */

View file

@ -29,6 +29,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_bhyve_snapshot.h"
#include <sys/param.h>
#include <sys/types.h>
#include <sys/queue.h>
@ -39,6 +41,7 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <machine/vmm.h>
#include <machine/vmm_snapshot.h>
#include "vmm_ktr.h"
#include "vatpic.h"
@ -472,3 +475,42 @@ vatpit_cleanup(struct vatpit *vatpit)
free(vatpit, M_VATPIT);
}
#ifdef BHYVE_SNAPSHOT
int
vatpit_snapshot(struct vatpit *vatpit, struct vm_snapshot_meta *meta)
{
int ret;
int i;
struct channel *channel;
SNAPSHOT_VAR_OR_LEAVE(vatpit->freq_bt.sec, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vatpit->freq_bt.frac, meta, ret, done);
/* properly restore timers; they will NOT work currently */
printf("%s: snapshot restore does not reset timers!\r\n", __func__);
for (i = 0; i < nitems(vatpit->channel); i++) {
channel = &vatpit->channel[i];
SNAPSHOT_VAR_OR_LEAVE(channel->mode, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(channel->initial, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(channel->now_bt.sec, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(channel->now_bt.frac, meta, ret, done);
SNAPSHOT_BUF_OR_LEAVE(channel->cr, sizeof(channel->cr),
meta, ret, done);
SNAPSHOT_BUF_OR_LEAVE(channel->ol, sizeof(channel->ol),
meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(channel->slatched, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(channel->status, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(channel->crbyte, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(channel->frbyte, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(channel->callout_bt.sec, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(channel->callout_bt.frac, meta, ret,
done);
}
done:
return (ret);
}
#endif

View file

@ -36,6 +36,8 @@
#define NMISC_PORT 0x61
struct vm_snapshot_meta;
struct vatpit *vatpit_init(struct vm *vm);
void vatpit_cleanup(struct vatpit *vatpit);
@ -43,5 +45,8 @@ int vatpit_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
uint32_t *eax);
int vatpit_nmisc_handler(struct vm *vm, int vcpuid, bool in, int port,
int bytes, uint32_t *eax);
#ifdef BHYVE_SNAPSHOT
int vatpit_snapshot(struct vatpit *vatpit, struct vm_snapshot_meta *meta);
#endif
#endif /* _VATPIT_H_ */

View file

@ -32,6 +32,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_bhyve_snapshot.h"
#include <sys/param.h>
#include <sys/lock.h>
#include <sys/mutex.h>
@ -43,6 +45,7 @@ __FBSDID("$FreeBSD$");
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
#include <machine/vmm_snapshot.h>
#include "vmm_lapic.h"
#include "vatpic.h"
@ -761,3 +764,49 @@ vhpet_getcap(struct vm_hpet_cap *cap)
cap->capabilities = vhpet_capabilities();
return (0);
}
#ifdef BHYVE_SNAPSHOT
int
vhpet_snapshot(struct vhpet *vhpet, struct vm_snapshot_meta *meta)
{
int i, ret;
uint32_t countbase;
SNAPSHOT_VAR_OR_LEAVE(vhpet->freq_sbt, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vhpet->config, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vhpet->isr, meta, ret, done);
/* at restore time the countbase should have the value it had when the
* snapshot was created; since the value is not directly kept in
* vhpet->countbase, but rather computed relative to the current system
* uptime using countbase_sbt, save the value retured by vhpet_counter
*/
if (meta->op == VM_SNAPSHOT_SAVE)
countbase = vhpet_counter(vhpet, NULL);
SNAPSHOT_VAR_OR_LEAVE(countbase, meta, ret, done);
if (meta->op == VM_SNAPSHOT_RESTORE)
vhpet->countbase = countbase;
for (i = 0; i < nitems(vhpet->timer); i++) {
SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].cap_config,
meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].msireg, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].compval, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].comprate, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].callout_sbt,
meta, ret, done);
}
done:
return (ret);
}
int
vhpet_restore_time(struct vhpet *vhpet)
{
if (vhpet_counter_enabled(vhpet))
vhpet_start_counting(vhpet);
return (0);
}
#endif

View file

@ -35,6 +35,8 @@
#define VHPET_BASE 0xfed00000
#define VHPET_SIZE 1024
struct vm_snapshot_meta;
struct vhpet *vhpet_init(struct vm *vm);
void vhpet_cleanup(struct vhpet *vhpet);
int vhpet_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t val,
@ -42,5 +44,9 @@ int vhpet_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t val,
int vhpet_mmio_read(void *vm, int vcpuid, uint64_t gpa, uint64_t *val,
int size, void *arg);
int vhpet_getcap(struct vm_hpet_cap *cap);
#ifdef BHYVE_SNAPSHOT
int vhpet_snapshot(struct vhpet *vhpet, struct vm_snapshot_meta *meta);
int vhpet_restore_time(struct vhpet *vhpet);
#endif
#endif /* _VHPET_H_ */

View file

@ -32,6 +32,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_bhyve_snapshot.h"
#include <sys/param.h>
#include <sys/queue.h>
#include <sys/lock.h>
@ -42,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include <x86/apicreg.h>
#include <machine/vmm.h>
#include <machine/vmm_snapshot.h>
#include "vmm_ktr.h"
#include "vmm_lapic.h"
@ -499,3 +502,22 @@ vioapic_pincount(struct vm *vm)
return (REDIR_ENTRIES);
}
#ifdef BHYVE_SNAPSHOT
int
vioapic_snapshot(struct vioapic *vioapic, struct vm_snapshot_meta *meta)
{
int ret;
int i;
SNAPSHOT_VAR_OR_LEAVE(vioapic->ioregsel, meta, ret, done);
for (i = 0; i < nitems(vioapic->rtbl); i++) {
SNAPSHOT_VAR_OR_LEAVE(vioapic->rtbl[i].reg, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vioapic->rtbl[i].acnt, meta, ret, done);
}
done:
return (ret);
}
#endif

View file

@ -32,6 +32,8 @@
#ifndef _VIOAPIC_H_
#define _VIOAPIC_H_
struct vm_snapshot_meta;
#define VIOAPIC_BASE 0xFEC00000
#define VIOAPIC_SIZE 4096
@ -49,4 +51,9 @@ int vioapic_mmio_read(void *vm, int vcpuid, uint64_t gpa,
int vioapic_pincount(struct vm *vm);
void vioapic_process_eoi(struct vm *vm, int vcpuid, int vector);
#ifdef BHYVE_SNAPSHOT
int vioapic_snapshot(struct vioapic *vioapic,
struct vm_snapshot_meta *meta);
#endif
#endif

View file

@ -32,6 +32,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_bhyve_snapshot.h"
#include <sys/param.h>
#include <sys/lock.h>
#include <sys/kernel.h>
@ -47,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <machine/smp.h>
#include <machine/vmm.h>
#include <machine/vmm_snapshot.h>
#include "vmm_lapic.h"
#include "vmm_ktr.h"
@ -1650,3 +1653,106 @@ vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys,
VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector);
vlapic_set_tmr(vlapic, vector, true);
}
#ifdef BHYVE_SNAPSHOT
static void
vlapic_reset_callout(struct vlapic *vlapic, uint32_t ccr)
{
/* The implementation is similar to the one in the
* `vlapic_icrtmr_write_handler` function
*/
sbintime_t sbt;
struct bintime bt;
VLAPIC_TIMER_LOCK(vlapic);
bt = vlapic->timer_freq_bt;
bintime_mul(&bt, ccr);
if (ccr != 0) {
binuptime(&vlapic->timer_fire_bt);
bintime_add(&vlapic->timer_fire_bt, &bt);
sbt = bttosbt(bt);
callout_reset_sbt(&vlapic->callout, sbt, 0,
vlapic_callout_handler, vlapic, 0);
} else {
/* even if the CCR was 0, periodic timers should be reset */
if (vlapic_periodic_timer(vlapic)) {
binuptime(&vlapic->timer_fire_bt);
bintime_add(&vlapic->timer_fire_bt,
&vlapic->timer_period_bt);
sbt = bttosbt(vlapic->timer_period_bt);
callout_stop(&vlapic->callout);
callout_reset_sbt(&vlapic->callout, sbt, 0,
vlapic_callout_handler, vlapic, 0);
}
}
VLAPIC_TIMER_UNLOCK(vlapic);
}
int
vlapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta)
{
int i, ret;
struct vlapic *vlapic;
struct LAPIC *lapic;
uint32_t ccr;
KASSERT(vm != NULL, ("%s: arg was NULL", __func__));
ret = 0;
for (i = 0; i < VM_MAXCPU; i++) {
vlapic = vm_lapic(vm, i);
/* snapshot the page first; timer period depends on icr_timer */
lapic = vlapic->apic_page;
SNAPSHOT_BUF_OR_LEAVE(lapic, PAGE_SIZE, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vlapic->esr_pending, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vlapic->timer_freq_bt.sec,
meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vlapic->timer_freq_bt.frac,
meta, ret, done);
/*
* Timer period is equal to 'icr_timer' ticks at a frequency of
* 'timer_freq_bt'.
*/
if (meta->op == VM_SNAPSHOT_RESTORE) {
vlapic->timer_period_bt = vlapic->timer_freq_bt;
bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer);
}
SNAPSHOT_BUF_OR_LEAVE(vlapic->isrvec_stk,
sizeof(vlapic->isrvec_stk),
meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vlapic->isrvec_stk_top, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vlapic->boot_state, meta, ret, done);
SNAPSHOT_BUF_OR_LEAVE(vlapic->lvt_last,
sizeof(vlapic->lvt_last),
meta, ret, done);
if (meta->op == VM_SNAPSHOT_SAVE)
ccr = vlapic_get_ccr(vlapic);
SNAPSHOT_VAR_OR_LEAVE(ccr, meta, ret, done);
if (meta->op == VM_SNAPSHOT_RESTORE) {
/* Reset the value of the 'timer_fire_bt' and the vlapic
* callout based on the value of the current count
* register saved when the VM snapshot was created
*/
vlapic_reset_callout(vlapic, ccr);
}
}
done:
return (ret);
}
#endif

View file

@ -32,6 +32,7 @@
#define _VLAPIC_H_
struct vm;
struct vm_snapshot_meta;
enum x2apic_state;
int vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset,
@ -109,4 +110,9 @@ void vlapic_icrtmr_write_handler(struct vlapic *vlapic);
void vlapic_dcr_write_handler(struct vlapic *vlapic);
void vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset);
void vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val);
#ifdef BHYVE_SNAPSHOT
int vlapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta);
#endif
#endif /* _VLAPIC_H_ */

View file

@ -29,6 +29,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_bhyve_snapshot.h"
#include <sys/param.h>
#include <sys/queue.h>
#include <sys/kernel.h>
@ -36,6 +38,7 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <machine/vmm.h>
#include <machine/vmm_snapshot.h>
#include "vpmtmr.h"
@ -103,3 +106,16 @@ vpmtmr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
return (0);
}
#ifdef BHYVE_SNAPSHOT
int
vpmtmr_snapshot(struct vpmtmr *vpmtmr, struct vm_snapshot_meta *meta)
{
int ret;
SNAPSHOT_VAR_OR_LEAVE(vpmtmr->baseval, meta, ret, done);
done:
return (ret);
}
#endif

View file

@ -34,6 +34,7 @@
#define IO_PMTMR 0x408
struct vpmtmr;
struct vm_snapshot_meta;
struct vpmtmr *vpmtmr_init(struct vm *vm);
void vpmtmr_cleanup(struct vpmtmr *pmtmr);
@ -41,4 +42,8 @@ void vpmtmr_cleanup(struct vpmtmr *pmtmr);
int vpmtmr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
uint32_t *val);
#ifdef BHYVE_SNAPSHOT
int vpmtmr_snapshot(struct vpmtmr *vpmtmr, struct vm_snapshot_meta *meta);
#endif
#endif

View file

@ -29,6 +29,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_bhyve_snapshot.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/queue.h>
@ -40,6 +42,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <machine/vmm.h>
#include <machine/vmm_snapshot.h>
#include <isa/rtc.h>
@ -1020,3 +1023,45 @@ vrtc_cleanup(struct vrtc *vrtc)
callout_drain(&vrtc->callout);
free(vrtc, M_VRTC);
}
#ifdef BHYVE_SNAPSHOT
int
vrtc_snapshot(struct vrtc *vrtc, struct vm_snapshot_meta *meta)
{
int ret;
VRTC_LOCK(vrtc);
SNAPSHOT_VAR_OR_LEAVE(vrtc->addr, meta, ret, done);
if (meta->op == VM_SNAPSHOT_RESTORE)
vrtc->base_uptime = sbinuptime();
SNAPSHOT_VAR_OR_LEAVE(vrtc->base_rtctime, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.sec, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.alarm_sec, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.min, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.alarm_min, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.hour, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.alarm_hour, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.day_of_week, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.day_of_month, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.month, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.year, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.reg_a, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.reg_b, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.reg_c, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.reg_d, meta, ret, done);
SNAPSHOT_BUF_OR_LEAVE(vrtc->rtcdev.nvram, sizeof(vrtc->rtcdev.nvram),
meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.century, meta, ret, done);
SNAPSHOT_BUF_OR_LEAVE(vrtc->rtcdev.nvram2, sizeof(vrtc->rtcdev.nvram2),
meta, ret, done);
vrtc_callout_reset(vrtc, vrtc_freq(vrtc));
VRTC_UNLOCK(vrtc);
done:
return (ret);
}
#endif

View file

@ -34,6 +34,7 @@
#include <isa/isareg.h>
struct vrtc;
struct vm_snapshot_meta;
struct vrtc *vrtc_init(struct vm *vm);
void vrtc_cleanup(struct vrtc *vrtc);
@ -49,4 +50,8 @@ int vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
int vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
uint32_t *val);
#ifdef BHYVE_SNAPSHOT
int vrtc_snapshot(struct vrtc *vrtc, struct vm_snapshot_meta *meta);
#endif
#endif

View file

@ -31,6 +31,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_bhyve_snapshot.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
@ -44,7 +46,7 @@ __FBSDID("$FreeBSD$");
#include <sys/rwlock.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/systm.h>
#include <sys/vnode.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
@ -53,6 +55,11 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_map.h>
#include <vm/vm_extern.h>
#include <vm/vm_param.h>
#include <vm/vm_pager.h>
#include <vm/vm_kern.h>
#include <vm/vnode_pager.h>
#include <vm/swap_pager.h>
#include <vm/uma.h>
#include <machine/cpu.h>
#include <machine/pcb.h>
@ -64,6 +71,7 @@ __FBSDID("$FreeBSD$");
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
#include <machine/vmm_instruction_emul.h>
#include <machine/vmm_snapshot.h>
#include "vmm_ioport.h"
#include "vmm_ktr.h"
@ -111,6 +119,7 @@ struct vcpu {
void *stats; /* (a,i) statistics */
struct vm_exit exitinfo; /* (x) exit reason and collateral */
uint64_t nextrip; /* (x) next instruction to execute */
uint64_t tsc_offset; /* (o) TSC offsetting */
};
#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
@ -204,6 +213,14 @@ static struct vmm_ops *ops;
(ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL)
#define VLAPIC_CLEANUP(vmi, vlapic) \
(ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL)
#ifdef BHYVE_SNAPSHOT
#define VM_SNAPSHOT_VMI(vmi, meta) \
(ops != NULL ? (*ops->vmsnapshot)(vmi, meta) : ENXIO)
#define VM_SNAPSHOT_VMCX(vmi, meta, vcpuid) \
(ops != NULL ? (*ops->vmcx_snapshot)(vmi, meta, vcpuid) : ENXIO)
#define VM_RESTORE_TSC(vmi, vcpuid, offset) \
(ops != NULL ? (*ops->vm_restore_tsc)(vmi, vcpuid, offset) : ENXIO)
#endif
#define fpu_start_emulating() load_cr0(rcr0() | CR0_TS)
#define fpu_stop_emulating() clts()
@ -290,6 +307,7 @@ vcpu_init(struct vm *vm, int vcpu_id, bool create)
vcpu->hostcpu = NOCPU;
vcpu->guestfpu = fpu_save_area_alloc();
vcpu->stats = vmm_stat_alloc();
vcpu->tsc_offset = 0;
}
vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
@ -2730,3 +2748,177 @@ vm_get_wiredcnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat)
VMM_STAT_FUNC(VMM_MEM_RESIDENT, "Resident memory", vm_get_rescnt);
VMM_STAT_FUNC(VMM_MEM_WIRED, "Wired memory", vm_get_wiredcnt);
#ifdef BHYVE_SNAPSHOT
static int
vm_snapshot_vcpus(struct vm *vm, struct vm_snapshot_meta *meta)
{
int ret;
int i;
struct vcpu *vcpu;
for (i = 0; i < VM_MAXCPU; i++) {
vcpu = &vm->vcpu[i];
SNAPSHOT_VAR_OR_LEAVE(vcpu->x2apic_state, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->exitintinfo, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->exc_vector, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->exc_errcode_valid, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->exc_errcode, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->guest_xcr0, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->exitinfo, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vcpu->nextrip, meta, ret, done);
/* XXX we're cheating here, since the value of tsc_offset as
* saved here is actually the value of the guest's TSC value.
*
* It will be turned turned back into an actual offset when the
* TSC restore function is called
*/
SNAPSHOT_VAR_OR_LEAVE(vcpu->tsc_offset, meta, ret, done);
}
done:
return (ret);
}
static int
vm_snapshot_vm(struct vm *vm, struct vm_snapshot_meta *meta)
{
int ret;
int i;
uint64_t now;
ret = 0;
now = rdtsc();
if (meta->op == VM_SNAPSHOT_SAVE) {
/* XXX make tsc_offset take the value TSC proper as seen by the
* guest
*/
for (i = 0; i < VM_MAXCPU; i++)
vm->vcpu[i].tsc_offset += now;
}
ret = vm_snapshot_vcpus(vm, meta);
if (ret != 0) {
printf("%s: failed to copy vm data to user buffer", __func__);
goto done;
}
if (meta->op == VM_SNAPSHOT_SAVE) {
/* XXX turn tsc_offset back into an offset; actual value is only
* required for restore; using it otherwise would be wrong
*/
for (i = 0; i < VM_MAXCPU; i++)
vm->vcpu[i].tsc_offset -= now;
}
done:
return (ret);
}
static int
vm_snapshot_vmcx(struct vm *vm, struct vm_snapshot_meta *meta)
{
int i, error;
error = 0;
for (i = 0; i < VM_MAXCPU; i++) {
error = VM_SNAPSHOT_VMCX(vm->cookie, meta, i);
if (error != 0) {
printf("%s: failed to snapshot vmcs/vmcb data for "
"vCPU: %d; error: %d\n", __func__, i, error);
goto done;
}
}
done:
return (error);
}
/*
* Save kernel-side structures to user-space for snapshotting.
*/
int
vm_snapshot_req(struct vm *vm, struct vm_snapshot_meta *meta)
{
int ret = 0;
switch (meta->dev_req) {
case STRUCT_VMX:
ret = VM_SNAPSHOT_VMI(vm->cookie, meta);
break;
case STRUCT_VMCX:
ret = vm_snapshot_vmcx(vm, meta);
break;
case STRUCT_VM:
ret = vm_snapshot_vm(vm, meta);
break;
case STRUCT_VIOAPIC:
ret = vioapic_snapshot(vm_ioapic(vm), meta);
break;
case STRUCT_VLAPIC:
ret = vlapic_snapshot(vm, meta);
break;
case STRUCT_VHPET:
ret = vhpet_snapshot(vm_hpet(vm), meta);
break;
case STRUCT_VATPIC:
ret = vatpic_snapshot(vm_atpic(vm), meta);
break;
case STRUCT_VATPIT:
ret = vatpit_snapshot(vm_atpit(vm), meta);
break;
case STRUCT_VPMTMR:
ret = vpmtmr_snapshot(vm_pmtmr(vm), meta);
break;
case STRUCT_VRTC:
ret = vrtc_snapshot(vm_rtc(vm), meta);
break;
default:
printf("%s: failed to find the requested type %#x\n",
__func__, meta->dev_req);
ret = (EINVAL);
}
return (ret);
}
int
vm_set_tsc_offset(struct vm *vm, int vcpuid, uint64_t offset)
{
struct vcpu *vcpu;
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
return (EINVAL);
vcpu = &vm->vcpu[vcpuid];
vcpu->tsc_offset = offset;
return (0);
}
int
vm_restore_time(struct vm *vm)
{
int error, i;
uint64_t now;
struct vcpu *vcpu;
now = rdtsc();
error = vhpet_restore_time(vm_hpet(vm));
if (error)
return (error);
for (i = 0; i < nitems(vm->vcpu); i++) {
vcpu = &vm->vcpu[i];
error = VM_RESTORE_TSC(vm->cookie, i, vcpu->tsc_offset - now);
if (error)
return (error);
}
return (0);
}
#endif

View file

@ -31,6 +31,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_bhyve_snapshot.h"
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/jail.h>
@ -53,8 +55,9 @@ __FBSDID("$FreeBSD$");
#include <machine/vmparam.h>
#include <machine/vmm.h>
#include <machine/vmm_instruction_emul.h>
#include <machine/vmm_dev.h>
#include <machine/vmm_instruction_emul.h>
#include <machine/vmm_snapshot.h>
#include "vmm_lapic.h"
#include "vmm_stat.h"
@ -381,6 +384,9 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
struct vm_cpu_topology *topology;
uint64_t *regvals;
int *regnums;
#ifdef BHYVE_SNAPSHOT
struct vm_snapshot_meta *snapshot_meta;
#endif
error = vmm_priv_check(curthread->td_ucred);
if (error)
@ -784,6 +790,15 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
&topology->threads, &topology->maxcpus);
error = 0;
break;
#ifdef BHYVE_SNAPSHOT
case VM_SNAPSHOT_REQ:
snapshot_meta = (struct vm_snapshot_meta *)data;
error = vm_snapshot_req(sc->vm, snapshot_meta);
break;
case VM_RESTORE_TIME:
error = vm_restore_time(sc->vm);
break;
#endif
default:
error = ENOTTY;
break;

View file

@ -0,0 +1,141 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2016 Flavius Anton
* Copyright (c) 2016 Mihai Tiganus
* Copyright (c) 2016-2019 Mihai Carabas
* Copyright (c) 2017-2019 Darius Mihai
* Copyright (c) 2017-2019 Elena Mihailescu
* Copyright (c) 2018-2019 Sergiu Weisz
* All rights reserved.
* The bhyve-snapshot feature was developed under sponsorships
* from Matthew Grooms.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/systm.h>
#include <machine/vmm_snapshot.h>
void
vm_snapshot_buf_err(const char *bufname, const enum vm_snapshot_op op)
{
const char *opstr;
if (op == VM_SNAPSHOT_SAVE)
opstr = "save";
else if (op == VM_SNAPSHOT_RESTORE)
opstr = "restore";
else
opstr = "unknown";
printf("%s: snapshot-%s failed for %s\r\n", __func__, opstr, bufname);
}
int
vm_snapshot_buf(volatile void *data, size_t data_size,
struct vm_snapshot_meta *meta)
{
struct vm_snapshot_buffer *buffer;
int op;
void *nv_data;
nv_data = __DEVOLATILE(void *, data);
buffer = &meta->buffer;
op = meta->op;
if (buffer->buf_rem < data_size) {
printf("%s: buffer too small\r\n", __func__);
return (E2BIG);
}
if (op == VM_SNAPSHOT_SAVE)
copyout(nv_data, buffer->buf, data_size);
else if (op == VM_SNAPSHOT_RESTORE)
copyin(buffer->buf, nv_data, data_size);
else
return (EINVAL);
buffer->buf += data_size;
buffer->buf_rem -= data_size;
return (0);
}
size_t
vm_get_snapshot_size(struct vm_snapshot_meta *meta)
{
size_t length;
struct vm_snapshot_buffer *buffer;
buffer = &meta->buffer;
if (buffer->buf_size < buffer->buf_rem) {
printf("%s: Invalid buffer: size = %zu, rem = %zu\r\n",
__func__, buffer->buf_size, buffer->buf_rem);
length = 0;
} else {
length = buffer->buf_size - buffer->buf_rem;
}
return (length);
}
int
vm_snapshot_buf_cmp(volatile void *data, size_t data_size,
struct vm_snapshot_meta *meta)
{
struct vm_snapshot_buffer *buffer;
int op;
int ret;
void *_data = *(void **)(void *)&data;
buffer = &meta->buffer;
op = meta->op;
if (buffer->buf_rem < data_size) {
printf("%s: buffer too small\r\n", __func__);
ret = E2BIG;
goto done;
}
if (op == VM_SNAPSHOT_SAVE) {
ret = 0;
copyout(_data, buffer->buf, data_size);
} else if (op == VM_SNAPSHOT_RESTORE) {
ret = memcmp(_data, buffer->buf, data_size);
} else {
ret = EINVAL;
goto done;
}
buffer->buf += data_size;
buffer->buf_rem -= data_size;
done:
return (ret);
}

View file

@ -15,6 +15,10 @@ opt_global.h:
@echo "#define MAC 1" >> ${.TARGET}
@echo "#define VIMAGE 1" >> ${.TARGET}
.endif
.if ${MK_BHYVE_SNAPSHOT} != "no"
opt_bhyve_snapshot.h:
@echo "#define BHYVE_SNAPSHOT 1" > ${.TARGET}
.endif
opt_bpf.h:
echo "#define DEV_BPF 1" > ${.TARGET}
.if ${MK_INET_SUPPORT} != "no"
@ -45,6 +49,9 @@ KERN_OPTS.powerpc=NEW_PCIB DEV_PCI
KERN_OPTS=MROUTING IEEE80211_DEBUG \
IEEE80211_SUPPORT_MESH DEV_BPF \
${KERN_OPTS.${MACHINE}} ${KERN_OPTS_EXTRA}
.if ${MK_BHYVE_SNAPSHOT} != "no"
KERN_OPTS+= BHYVE_SNAPSHOT
.endif
.if ${MK_INET_SUPPORT} != "no"
KERN_OPTS+= INET TCP_OFFLOAD
.endif

View file

@ -49,6 +49,7 @@ __DEFAULT_YES_OPTIONS = \
ZFS
__DEFAULT_NO_OPTIONS = \
BHYVE_SNAPSHOT \
EXTRA_TCP_STACKS \
KERNEL_RETPOLINE \
OFED \

View file

@ -3,6 +3,7 @@
AUTO_EOI_1 opt_auto_eoi.h
AUTO_EOI_2 opt_auto_eoi.h
BHYVE_SNAPSHOT
COUNT_XINVLTLB_HITS opt_smp.h
COUNT_IPIS opt_smp.h
MAXMEM

View file

@ -1,8 +1,11 @@
# $FreeBSD$
.include <kmod.opts.mk>
KMOD= vmm
SRCS= opt_acpi.h opt_ddb.h device_if.h bus_if.h pci_if.h pcib_if.h acpi_if.h
SRCS= opt_acpi.h opt_bhyve_snapshot.h opt_ddb.h
SRCS+= device_if.h bus_if.h pci_if.h pcib_if.h acpi_if.h vnode_if.h
DPSRCS+= vmx_assym.h svm_assym.h
DPSRCS+= vmx_genassym.c svm_genassym.c offset.inc
@ -55,6 +58,10 @@ SRCS+= vmcb.c \
amdvi_hw.c \
svm_msr.c
.if ${KERN_OPTS:MBHYVE_SNAPSHOT} != ""
SRCS+= vmm_snapshot.c
.endif
CLEANFILES= vmx_assym.h vmx_genassym.o svm_assym.h svm_genassym.o
OBJS_DEPEND_GUESS.vmx_support.o+= vmx_assym.h

View file

@ -0,0 +1,7 @@
.\" $FreeBSD$
Set to include support for save and restore (snapshots) in
.Xr bhyve 8
and
.Xr bhyvectl 8 .
.Pp
This option only affects amd64/amd64.

View file

@ -72,10 +72,17 @@ SRCS= \
spinup_ap.c \
iov.c
.if ${MK_BHYVE_SNAPSHOT} != "no"
SRCS+= snapshot.c
.endif
.PATH: ${BHYVE_SYSDIR}/sys/amd64/vmm
SRCS+= vmm_instruction_emul.c
LIBADD= vmmapi md pthread z util sbuf cam
.if ${MK_BHYVE_SNAPSHOT} != "no"
LIBADD+= ucl xo
.endif
.if ${MK_INET_SUPPORT} != "no"
CFLAGS+=-DINET
@ -92,6 +99,14 @@ LIBADD+= crypto
CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/e1000
CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/mii
CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/usb/controller
.if ${MK_BHYVE_SNAPSHOT} != "no"
CFLAGS+= -I${SRCTOP}/contrib/libucl/include
# Temporary disable capsicum, until we integrate checkpoint code with it.
CFLAGS+= -DWITHOUT_CAPSICUM
CFLAGS+= -DBHYVE_SNAPSHOT
.endif
.ifdef GDB_LOG
CFLAGS+=-DGDB_LOG

View file

@ -13,8 +13,10 @@ DIRDEPS = \
lib/libcompiler_rt \
lib/libsbuf \
lib/libthr \
lib/libucl \
lib/libutil \
lib/libvmmapi \
lib/libxo \
lib/libz \

View file

@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <machine/vmm.h>
#include <machine/vmm_snapshot.h>
#include <vmmapi.h>
@ -137,6 +138,10 @@ struct atkbdc_softc {
struct aux_dev aux;
};
#ifdef BHYVE_SNAPSHOT
static struct atkbdc_softc *atkbdc_sc = NULL;
#endif
static void
atkbdc_assert_kbd_intr(struct atkbdc_softc *sc)
{
@ -548,8 +553,49 @@ atkbdc_init(struct vmctx *ctx)
sc->ps2kbd_sc = ps2kbd_init(sc);
sc->ps2mouse_sc = ps2mouse_init(sc);
#ifdef BHYVE_SNAPSHOT
assert(atkbdc_sc == NULL);
atkbdc_sc = sc;
#endif
}
#ifdef BHYVE_SNAPSHOT
int
atkbdc_snapshot(struct vm_snapshot_meta *meta)
{
int ret;
SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->status, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->outport, meta, ret, done);
SNAPSHOT_BUF_OR_LEAVE(atkbdc_sc->ram,
sizeof(atkbdc_sc->ram), meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->curcmd, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->ctrlbyte, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->kbd, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->kbd.irq_active, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->kbd.irq, meta, ret, done);
SNAPSHOT_BUF_OR_LEAVE(atkbdc_sc->kbd.buffer,
sizeof(atkbdc_sc->kbd.buffer), meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->kbd.brd, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->kbd.bwr, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->kbd.bcnt, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->aux.irq_active, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->aux.irq, meta, ret, done);
ret = ps2kbd_snapshot(atkbdc_sc->ps2kbd_sc, meta);
if (ret != 0)
goto done;
ret = ps2mouse_snapshot(atkbdc_sc->ps2mouse_sc, meta);
done:
return (ret);
}
#endif
static void
atkbdc_dsdt(void)
{

View file

@ -30,9 +30,14 @@
#define _ATKBDC_H_
struct atkbdc_softc;
struct vm_snapshot_meta;
struct vmctx;
void atkbdc_init(struct vmctx *ctx);
void atkbdc_event(struct atkbdc_softc *sc, int iskbd);
#ifdef BHYVE_SNAPSHOT
int atkbdc_snapshot(struct vm_snapshot_meta *meta);
#endif
#endif /* _ATKBDC_H_ */

View file

@ -24,7 +24,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd April 22, 2020
.Dd May 04, 2020
.Dt BHYVE 8
.Os
.Sh NAME
@ -61,6 +61,7 @@
.Sm on
.Oc
.Op Fl p Ar vcpu Ns Cm \&: Ns Ar hostcpu
.Op Fl r Ar file
.Oo Fl s
.Sm off
.Cm help | Ar slot Cm \&, Ar emulation Op Cm \&, Ar conf
@ -191,6 +192,21 @@ to
.Em hostcpu .
.It Fl P
Force the guest virtual CPU to exit when a PAUSE instruction is detected.
.It Fl r Ar file
Resume a guest from a snapshot.
The guest memory contents are restored from
.Ar file ,
and the guest device and vCPU state are restored from the file
.Dq Ar file Ns .kern .
.Pp
Note that the current snapshot file format requires that the configuration of
devices in the new VM match the VM from which the snapshot was taken by specifying the
same
.Op Fl s
and
.Op Fl l
options.
The count of vCPUs and memory configuration are read from the snapshot.
.It Fl s Op Ar help|slot,emulation Ns Op , Ns Ar conf
Configure a virtual PCI slot and function.
.Pp

View file

@ -36,7 +36,14 @@ __FBSDID("$FreeBSD$");
#include <sys/capsicum.h>
#endif
#include <sys/mman.h>
#ifdef BHYVE_SNAPSHOT
#include <sys/socket.h>
#include <sys/stat.h>
#endif
#include <sys/time.h>
#ifdef BHYVE_SNAPSHOT
#include <sys/un.h>
#endif
#include <amd64/vmm/intel/vmcs.h>
@ -51,6 +58,9 @@ __FBSDID("$FreeBSD$");
#include <string.h>
#include <err.h>
#include <errno.h>
#ifdef BHYVE_SNAPSHOT
#include <fcntl.h>
#endif
#include <libgen.h>
#include <unistd.h>
#include <assert.h>
@ -59,6 +69,12 @@ __FBSDID("$FreeBSD$");
#include <sysexits.h>
#include <stdbool.h>
#include <stdint.h>
#ifdef BHYVE_SNAPSHOT
#include <ucl.h>
#include <unistd.h>
#include <libxo/xo.h>
#endif
#include <machine/vmm.h>
#ifndef WITHOUT_CAPSICUM
@ -83,6 +99,9 @@ __FBSDID("$FreeBSD$");
#include "pci_irq.h"
#include "pci_lpc.h"
#include "smbiostbl.h"
#ifdef BHYVE_SNAPSHOT
#include "snapshot.h"
#endif
#include "xmsr.h"
#include "spinup_ap.h"
#include "rtc.h"
@ -163,7 +182,7 @@ static const char * const vmx_exit_reason_desc[] = {
typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu);
extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu);
char *vmname;
const char *vmname;
int guest_ncpus;
uint16_t cores, maxcpus, sockets, threads;
@ -229,6 +248,9 @@ usage(int code)
" -H: vmexit from the guest on hlt\n"
" -l: LPC device configuration\n"
" -m: memory size in MB\n"
#ifdef BHYVE_SNAPSHOT
" -r: path to checkpoint file\n"
#endif
" -p: pin 'vcpu' to 'hostcpu'\n"
" -P: vmexit from the guest on pause\n"
" -s: <slot,driver,configinfo> PCI slot config\n"
@ -388,6 +410,14 @@ paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len)
return (vm_map_gpa(ctx, gaddr, len));
}
#ifdef BHYVE_SNAPSHOT
uintptr_t
paddr_host2guest(struct vmctx *ctx, void *addr)
{
return (vm_rev_map_gpa(ctx, addr));
}
#endif
int
fbsdrun_vmexit_on_pause(void)
{
@ -422,6 +452,9 @@ fbsdrun_start_thread(void *param)
snprintf(tname, sizeof(tname), "vcpu %d", vcpu);
pthread_set_name_np(mtp->mt_thr, tname);
#ifdef BHYVE_SNAPSHOT
checkpoint_cpu_add(vcpu);
#endif
if (gdb_port != 0)
gdb_cpu_add(vcpu);
@ -697,11 +730,15 @@ vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
stats.vmexit_mtrap++;
if (gdb_port == 0) {
fprintf(stderr, "vm_loop: unexpected VMEXIT_MTRAP\n");
exit(4);
}
gdb_cpu_mtrap(*pvcpu);
#ifdef BHYVE_SNAPSHOT
checkpoint_cpu_suspend(*pvcpu);
#endif
if (gdb_port != 0)
gdb_cpu_mtrap(*pvcpu);
#ifdef BHYVE_SNAPSHOT
checkpoint_cpu_resume(*pvcpu);
#endif
return (VMEXIT_CONTINUE);
}
@ -778,11 +815,14 @@ static int
vmexit_debug(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
{
if (gdb_port == 0) {
fprintf(stderr, "vm_loop: unexpected VMEXIT_DEBUG\n");
exit(4);
}
gdb_cpu_suspend(*pvcpu);
#ifdef BHYVE_SNAPSHOT
checkpoint_cpu_suspend(*pvcpu);
#endif
if (gdb_port != 0)
gdb_cpu_suspend(*pvcpu);
#ifdef BHYVE_SNAPSHOT
checkpoint_cpu_resume(*pvcpu);
#endif
return (VMEXIT_CONTINUE);
}
@ -997,6 +1037,22 @@ do_open(const char *vmname)
return (ctx);
}
void
spinup_vcpu(struct vmctx *ctx, int vcpu)
{
int error;
uint64_t rip;
error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip);
assert(error == 0);
fbsdrun_set_capabilities(ctx, vcpu);
error = vm_set_capability(ctx, vcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
assert(error == 0);
fbsdrun_addcpu(ctx, BSP, vcpu, rip);
}
int
main(int argc, char *argv[])
{
@ -1008,6 +1064,13 @@ main(int argc, char *argv[])
uint64_t rip;
size_t memsize;
char *optstr;
#ifdef BHYVE_SNAPSHOT
char *restore_file;
struct restore_state rstate;
int vcpu;
restore_file = NULL;
#endif
bvmcons = 0;
progname = basename(argv[0]);
@ -1021,7 +1084,11 @@ main(int argc, char *argv[])
rtc_localtime = 1;
memflags = 0;
#ifdef BHYVE_SNAPSHOT
optstr = "abehuwxACHIPSWYp:g:G:c:s:m:l:U:r:";
#else
optstr = "abehuwxACHIPSWYp:g:G:c:s:m:l:U:";
#endif
while ((c = getopt(argc, argv, optstr)) != -1) {
switch (c) {
case 'a':
@ -1067,6 +1134,11 @@ main(int argc, char *argv[])
"configuration '%s'", optarg);
}
break;
#ifdef BHYVE_SNAPSHOT
case 'r':
restore_file = optarg;
break;
#endif
case 's':
if (strncmp(optarg, "help", strlen(optarg)) == 0) {
pci_print_supported_devices();
@ -1128,12 +1200,50 @@ main(int argc, char *argv[])
argc -= optind;
argv += optind;
#ifdef BHYVE_SNAPSHOT
if (argc > 1 || (argc == 0 && restore_file == NULL))
usage(1);
if (restore_file != NULL) {
error = load_restore_file(restore_file, &rstate);
if (error) {
fprintf(stderr, "Failed to read checkpoint info from "
"file: '%s'.\n", restore_file);
exit(1);
}
}
if (argc == 1) {
vmname = argv[0];
} else {
vmname = lookup_vmname(&rstate);
if (vmname == NULL) {
fprintf(stderr, "Cannot find VM name in restore file. "
"Please specify one.\n");
exit(1);
}
}
#else
if (argc != 1)
usage(1);
vmname = argv[0];
#endif
ctx = do_open(vmname);
#ifdef BHYVE_SNAPSHOT
if (restore_file != NULL) {
guest_ncpus = lookup_guest_ncpus(&rstate);
memflags = lookup_memflags(&rstate);
memsize = lookup_memsize(&rstate);
}
if (guest_ncpus < 1) {
fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus);
exit(1);
}
#endif
max_vcpus = num_vcpus_allowed(ctx);
if (guest_ncpus > max_vcpus) {
fprintf(stderr, "%d vCPUs requested but only %d available\n",
@ -1200,6 +1310,40 @@ main(int argc, char *argv[])
assert(error == 0);
}
#ifdef BHYVE_SNAPSHOT
if (restore_file != NULL) {
fprintf(stdout, "Pausing pci devs...\r\n");
if (vm_pause_user_devs(ctx) != 0) {
fprintf(stderr, "Failed to pause PCI device state.\n");
exit(1);
}
fprintf(stdout, "Restoring vm mem...\r\n");
if (restore_vm_mem(ctx, &rstate) != 0) {
fprintf(stderr, "Failed to restore VM memory.\n");
exit(1);
}
fprintf(stdout, "Restoring pci devs...\r\n");
if (vm_restore_user_devs(ctx, &rstate) != 0) {
fprintf(stderr, "Failed to restore PCI device state.\n");
exit(1);
}
fprintf(stdout, "Restoring kernel structs...\r\n");
if (vm_restore_kern_structs(ctx, &rstate) != 0) {
fprintf(stderr, "Failed to restore kernel structs.\n");
exit(1);
}
fprintf(stdout, "Resuming pci devs...\r\n");
if (vm_resume_user_devs(ctx) != 0) {
fprintf(stderr, "Failed to resume PCI device state.\n");
exit(1);
}
}
#endif
error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip);
assert(error == 0);
@ -1240,11 +1384,41 @@ main(int argc, char *argv[])
errx(EX_OSERR, "cap_enter() failed");
#endif
#ifdef BHYVE_SNAPSHOT
if (restore_file != NULL)
destroy_restore_state(&rstate);
/*
* checkpointing thread for communication with bhyvectl
*/
if (init_checkpoint_thread(ctx) < 0)
printf("Failed to start checkpoint thread!\r\n");
if (restore_file != NULL)
vm_restore_time(ctx);
#endif
/*
* Add CPU 0
*/
fbsdrun_addcpu(ctx, BSP, BSP, rip);
#ifdef BHYVE_SNAPSHOT
/*
* If we restore a VM, start all vCPUs now (including APs), otherwise,
* let the guest OS to spin them up later via vmexits.
*/
if (restore_file != NULL) {
for (vcpu = 0; vcpu < guest_ncpus; vcpu++) {
if (vcpu == BSP)
continue;
fprintf(stdout, "spinning up vcpu no %d...\r\n", vcpu);
spinup_vcpu(ctx, vcpu);
}
}
#endif
/*
* Head off to the main event dispatch loop
*/

View file

@ -38,9 +38,12 @@ struct vmctx;
extern int guest_ncpus;
extern uint16_t cores, sockets, threads;
extern char *guest_uuid_str;
extern char *vmname;
extern const char *vmname;
void *paddr_guest2host(struct vmctx *ctx, uintptr_t addr, size_t len);
#ifdef BHYVE_SNAPSHOT
uintptr_t paddr_host2guest(struct vmctx *ctx, void *addr);
#endif
void fbsdrun_set_capabilities(struct vmctx *ctx, int cpu);
void fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip);

View file

@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$");
#include <unistd.h>
#include <machine/atomic.h>
#include <machine/vmm_snapshot.h>
#include "bhyverun.h"
#include "debug.h"
@ -105,9 +106,13 @@ struct blockif_ctxt {
int bc_psectsz;
int bc_psectoff;
int bc_closing;
int bc_paused;
int bc_work_count;
pthread_t bc_btid[BLOCKIF_NUMTHR];
pthread_mutex_t bc_mtx;
pthread_cond_t bc_cond;
pthread_cond_t bc_paused_cond;
pthread_cond_t bc_work_done_cond;
/* Request elements and free/pending/busy queues */
TAILQ_HEAD(, blockif_elem) bc_freeq;
@ -210,6 +215,18 @@ blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be)
TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link);
}
static int
blockif_flush_bc(struct blockif_ctxt *bc)
{
if (bc->bc_ischr) {
if (ioctl(bc->bc_fd, DIOCGFLUSH))
return (errno);
} else if (fsync(bc->bc_fd))
return (errno);
return (0);
}
static void
blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf)
{
@ -300,11 +317,7 @@ blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf)
}
break;
case BOP_FLUSH:
if (bc->bc_ischr) {
if (ioctl(bc->bc_fd, DIOCGFLUSH))
err = errno;
} else if (fsync(bc->bc_fd))
err = errno;
err = blockif_flush_bc(bc);
break;
case BOP_DELETE:
if (!bc->bc_candelete)
@ -348,15 +361,30 @@ blockif_thr(void *arg)
pthread_mutex_lock(&bc->bc_mtx);
for (;;) {
while (blockif_dequeue(bc, t, &be)) {
bc->bc_work_count++;
/* We cannot process work if the interface is paused */
while (!bc->bc_paused && blockif_dequeue(bc, t, &be)) {
pthread_mutex_unlock(&bc->bc_mtx);
blockif_proc(bc, be, buf);
pthread_mutex_lock(&bc->bc_mtx);
blockif_complete(bc, be);
}
bc->bc_work_count--;
/* If none of the workers are busy, notify the main thread */
if (bc->bc_work_count == 0)
pthread_cond_broadcast(&bc->bc_work_done_cond);
/* Check ctxt status here to see if exit requested */
if (bc->bc_closing)
break;
/* Make all worker threads wait here if the device is paused */
while (bc->bc_paused)
pthread_cond_wait(&bc->bc_paused_cond, &bc->bc_mtx);
pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx);
}
pthread_mutex_unlock(&bc->bc_mtx);
@ -565,6 +593,10 @@ blockif_open(const char *optstr, const char *ident)
bc->bc_psectoff = psectoff;
pthread_mutex_init(&bc->bc_mtx, NULL);
pthread_cond_init(&bc->bc_cond, NULL);
bc->bc_paused = 0;
bc->bc_work_count = 0;
pthread_cond_init(&bc->bc_paused_cond, NULL);
pthread_cond_init(&bc->bc_work_done_cond, NULL);
TAILQ_INIT(&bc->bc_freeq);
TAILQ_INIT(&bc->bc_pendq);
TAILQ_INIT(&bc->bc_busyq);
@ -657,6 +689,8 @@ blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq)
assert(bc->bc_magic == BLOCKIF_SIG);
pthread_mutex_lock(&bc->bc_mtx);
/* XXX: not waiting while paused */
/*
* Check pending requests.
*/
@ -855,3 +889,100 @@ blockif_candelete(struct blockif_ctxt *bc)
assert(bc->bc_magic == BLOCKIF_SIG);
return (bc->bc_candelete);
}
#ifdef BHYVE_SNAPSHOT
void
blockif_pause(struct blockif_ctxt *bc)
{
assert(bc != NULL);
assert(bc->bc_magic == BLOCKIF_SIG);
pthread_mutex_lock(&bc->bc_mtx);
bc->bc_paused = 1;
/* The interface is paused. Wait for workers to finish their work */
while (bc->bc_work_count)
pthread_cond_wait(&bc->bc_work_done_cond, &bc->bc_mtx);
pthread_mutex_unlock(&bc->bc_mtx);
if (blockif_flush_bc(bc))
fprintf(stderr, "%s: [WARN] failed to flush backing file.\r\n",
__func__);
}
void
blockif_resume(struct blockif_ctxt *bc)
{
assert(bc != NULL);
assert(bc->bc_magic == BLOCKIF_SIG);
pthread_mutex_lock(&bc->bc_mtx);
bc->bc_paused = 0;
/* resume the threads waiting for paused */
pthread_cond_broadcast(&bc->bc_paused_cond);
/* kick the threads after restore */
pthread_cond_broadcast(&bc->bc_cond);
pthread_mutex_unlock(&bc->bc_mtx);
}
int
blockif_snapshot_req(struct blockif_req *br, struct vm_snapshot_meta *meta)
{
int i;
struct iovec *iov;
int ret;
SNAPSHOT_VAR_OR_LEAVE(br->br_iovcnt, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(br->br_offset, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(br->br_resid, meta, ret, done);
/*
* XXX: The callback and parameter must be filled by the virtualized
* device that uses the interface, during its init; we're not touching
* them here.
*/
/* Snapshot the iovecs. */
for (i = 0; i < br->br_iovcnt; i++) {
iov = &br->br_iov[i];
SNAPSHOT_VAR_OR_LEAVE(iov->iov_len, meta, ret, done);
/* We assume the iov is a guest-mapped address. */
SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(iov->iov_base, iov->iov_len,
false, meta, ret, done);
}
done:
return (ret);
}
int
blockif_snapshot(struct blockif_ctxt *bc, struct vm_snapshot_meta *meta)
{
int ret;
if (bc->bc_paused == 0) {
fprintf(stderr, "%s: Snapshot failed: "
"interface not paused.\r\n", __func__);
return (ENXIO);
}
pthread_mutex_lock(&bc->bc_mtx);
SNAPSHOT_VAR_OR_LEAVE(bc->bc_magic, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(bc->bc_ischr, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(bc->bc_isgeom, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(bc->bc_candelete, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(bc->bc_rdonly, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(bc->bc_size, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(bc->bc_sectsz, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(bc->bc_psectsz, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(bc->bc_psectoff, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(bc->bc_closing, meta, ret, done);
done:
pthread_mutex_unlock(&bc->bc_mtx);
return (ret);
}
#endif

View file

@ -41,6 +41,9 @@
#include <sys/uio.h>
#include <sys/unistd.h>
struct vm_snapshot_meta;
/*
* BLOCKIF_IOV_MAX is the maximum number of scatter/gather entries in
* a single request. BLOCKIF_RING_MAX is the maxmimum number of
@ -74,5 +77,13 @@ int blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq);
int blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq);
int blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq);
int blockif_close(struct blockif_ctxt *bc);
#ifdef BHYVE_SNAPSHOT
void blockif_pause(struct blockif_ctxt *bc);
void blockif_resume(struct blockif_ctxt *bc);
int blockif_snapshot_req(struct blockif_req *br,
struct vm_snapshot_meta *meta);
int blockif_snapshot(struct blockif_ctxt *bc,
struct vm_snapshot_meta *meta);
#endif
#endif /* _BLOCK_IF_H_ */

View file

@ -63,7 +63,7 @@ __FBSDID("$FreeBSD$");
#define MEVENT_MAX 64
extern char *vmname;
extern const char *vmname;
static pthread_t mevent_tid;
static int mevent_timid = 43;

View file

@ -41,6 +41,8 @@ __FBSDID("$FreeBSD$");
#include <sys/ata.h>
#include <sys/endian.h>
#include <machine/vmm_snapshot.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
@ -131,6 +133,7 @@ struct ahci_ioreq {
uint32_t done;
int slot;
int more;
int readop;
};
struct ahci_port {
@ -724,6 +727,7 @@ ahci_handle_rw(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
aior->slot = slot;
aior->len = len;
aior->done = done;
aior->readop = readop;
breq = &aior->io_req;
breq->br_offset = lba + done;
ahci_build_iov(p, aior, prdt, hdr->prdtl);
@ -1420,6 +1424,7 @@ atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
aior->slot = slot;
aior->len = len;
aior->done = done;
aior->readop = 1;
breq = &aior->io_req;
breq->br_offset = lba + done;
ahci_build_iov(p, aior, prdt, hdr->prdtl);
@ -2446,6 +2451,282 @@ pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
return (pci_ahci_init(ctx, pi, opts, 1));
}
#ifdef BHYVE_SNAPSHOT
static int
pci_ahci_snapshot_save_queues(struct ahci_port *port,
struct vm_snapshot_meta *meta)
{
int ret;
int idx;
struct ahci_ioreq *ioreq;
STAILQ_FOREACH(ioreq, &port->iofhd, io_flist) {
idx = ((void *) ioreq - (void *) port->ioreq) / sizeof(*ioreq);
SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
}
idx = -1;
SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
TAILQ_FOREACH(ioreq, &port->iobhd, io_blist) {
idx = ((void *) ioreq - (void *) port->ioreq) / sizeof(*ioreq);
SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
/*
* Snapshot only the busy requests; other requests are
* not valid.
*/
ret = blockif_snapshot_req(&ioreq->io_req, meta);
if (ret != 0) {
fprintf(stderr, "%s: failed to snapshot req\r\n",
__func__);
goto done;
}
}
idx = -1;
SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
done:
return (ret);
}
static int
pci_ahci_snapshot_restore_queues(struct ahci_port *port,
struct vm_snapshot_meta *meta)
{
int ret;
int idx;
struct ahci_ioreq *ioreq;
/* Empty the free queue before restoring. */
while (!STAILQ_EMPTY(&port->iofhd))
STAILQ_REMOVE_HEAD(&port->iofhd, io_flist);
/* Restore the free queue. */
while (1) {
SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
if (idx == -1)
break;
STAILQ_INSERT_TAIL(&port->iofhd, &port->ioreq[idx], io_flist);
}
/* Restore the busy queue. */
while (1) {
SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
if (idx == -1)
break;
ioreq = &port->ioreq[idx];
TAILQ_INSERT_TAIL(&port->iobhd, ioreq, io_blist);
/*
* Restore only the busy requests; other requests are
* not valid.
*/
ret = blockif_snapshot_req(&ioreq->io_req, meta);
if (ret != 0) {
fprintf(stderr, "%s: failed to restore request\r\n",
__func__);
goto done;
}
/* Re-enqueue the requests in the block interface. */
if (ioreq->readop)
ret = blockif_read(port->bctx, &ioreq->io_req);
else
ret = blockif_write(port->bctx, &ioreq->io_req);
if (ret != 0) {
fprintf(stderr,
"%s: failed to re-enqueue request\r\n",
__func__);
goto done;
}
}
done:
return (ret);
}
static int
pci_ahci_snapshot(struct vm_snapshot_meta *meta)
{
int i, j, ret;
void *bctx;
struct pci_devinst *pi;
struct pci_ahci_softc *sc;
struct ahci_port *port;
struct ahci_cmd_hdr *hdr;
struct ahci_ioreq *ioreq;
pi = meta->dev_data;
sc = pi->pi_arg;
/* TODO: add mtx lock/unlock */
SNAPSHOT_VAR_OR_LEAVE(sc->ports, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->cap, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->ghc, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->is, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->pi, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->vs, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->ccc_ctl, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->ccc_pts, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->em_loc, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->em_ctl, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->cap2, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->bohc, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->lintr, meta, ret, done);
for (i = 0; i < MAX_PORTS; i++) {
port = &sc->port[i];
if (meta->op == VM_SNAPSHOT_SAVE)
bctx = port->bctx;
SNAPSHOT_VAR_OR_LEAVE(bctx, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->port, meta, ret, done);
/* Mostly for restore; save is ensured by the lines above. */
if (((bctx == NULL) && (port->bctx != NULL)) ||
((bctx != NULL) && (port->bctx == NULL))) {
fprintf(stderr, "%s: ports not matching\r\n", __func__);
ret = EINVAL;
goto done;
}
if (port->bctx == NULL)
continue;
if (port->port != i) {
fprintf(stderr, "%s: ports not matching: "
"actual: %d expected: %d\r\n",
__func__, port->port, i);
ret = EINVAL;
goto done;
}
SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(port->cmd_lst,
AHCI_CL_SIZE * AHCI_MAX_SLOTS, false, meta, ret, done);
SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(port->rfis, 256, false, meta,
ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->ident, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->atapi, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->reset, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->waitforclear, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->mult_sectors, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->xfermode, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->err_cfis, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->sense_key, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->asc, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->ccs, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->pending, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->clb, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->clbu, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->fb, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->fbu, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->ie, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->cmd, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->unused0, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->tfd, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->sig, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->ssts, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->sctl, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->serr, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->sact, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->ci, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->sntf, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->fbs, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->ioqsz, meta, ret, done);
for (j = 0; j < port->ioqsz; j++) {
ioreq = &port->ioreq[j];
/* blockif_req snapshot done only for busy requests. */
hdr = (struct ahci_cmd_hdr *)(port->cmd_lst +
ioreq->slot * AHCI_CL_SIZE);
SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(ioreq->cfis,
0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry),
false, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(ioreq->len, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(ioreq->done, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(ioreq->slot, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(ioreq->more, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(ioreq->readop, meta, ret, done);
}
/* Perform save / restore specific operations. */
if (meta->op == VM_SNAPSHOT_SAVE) {
ret = pci_ahci_snapshot_save_queues(port, meta);
if (ret != 0)
goto done;
} else if (meta->op == VM_SNAPSHOT_RESTORE) {
ret = pci_ahci_snapshot_restore_queues(port, meta);
if (ret != 0)
goto done;
} else {
ret = EINVAL;
goto done;
}
ret = blockif_snapshot(port->bctx, meta);
if (ret != 0) {
fprintf(stderr, "%s: failed to restore blockif\r\n",
__func__);
goto done;
}
}
done:
return (ret);
}
static int
pci_ahci_pause(struct vmctx *ctx, struct pci_devinst *pi)
{
struct pci_ahci_softc *sc;
struct blockif_ctxt *bctxt;
int i;
sc = pi->pi_arg;
for (i = 0; i < MAX_PORTS; i++) {
bctxt = sc->port[i].bctx;
if (bctxt == NULL)
continue;
blockif_pause(bctxt);
}
return (0);
}
static int
pci_ahci_resume(struct vmctx *ctx, struct pci_devinst *pi)
{
struct pci_ahci_softc *sc;
struct blockif_ctxt *bctxt;
int i;
sc = pi->pi_arg;
for (i = 0; i < MAX_PORTS; i++) {
bctxt = sc->port[i].bctx;
if (bctxt == NULL)
continue;
blockif_resume(bctxt);
}
return (0);
}
#endif
/*
* Use separate emulation names to distinguish drive and atapi devices
*/
@ -2453,7 +2734,12 @@ struct pci_devemu pci_de_ahci = {
.pe_emu = "ahci",
.pe_init = pci_ahci_hd_init,
.pe_barwrite = pci_ahci_write,
.pe_barread = pci_ahci_read
.pe_barread = pci_ahci_read,
#ifdef BHYVE_SNAPSHOT
.pe_snapshot = pci_ahci_snapshot,
.pe_pause = pci_ahci_pause,
.pe_resume = pci_ahci_resume,
#endif
};
PCI_EMUL_SET(pci_de_ahci);
@ -2461,7 +2747,12 @@ struct pci_devemu pci_de_ahci_hd = {
.pe_emu = "ahci-hd",
.pe_init = pci_ahci_hd_init,
.pe_barwrite = pci_ahci_write,
.pe_barread = pci_ahci_read
.pe_barread = pci_ahci_read,
#ifdef BHYVE_SNAPSHOT
.pe_snapshot = pci_ahci_snapshot,
.pe_pause = pci_ahci_pause,
.pe_resume = pci_ahci_resume,
#endif
};
PCI_EMUL_SET(pci_de_ahci_hd);
@ -2469,6 +2760,11 @@ struct pci_devemu pci_de_ahci_cd = {
.pe_emu = "ahci-cd",
.pe_init = pci_ahci_atapi_init,
.pe_barwrite = pci_ahci_write,
.pe_barread = pci_ahci_read
.pe_barread = pci_ahci_read,
#ifdef BHYVE_SNAPSHOT
.pe_snapshot = pci_ahci_snapshot,
.pe_pause = pci_ahci_pause,
.pe_resume = pci_ahci_resume,
#endif
};
PCI_EMUL_SET(pci_de_ahci_cd);

View file

@ -46,6 +46,8 @@ __FBSDID("$FreeBSD$");
#ifndef WITHOUT_CAPSICUM
#include <capsicum_helpers.h>
#endif
#include <machine/vmm_snapshot.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
@ -2378,11 +2380,168 @@ e82545_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
return (0);
}
#ifdef BHYVE_SNAPSHOT
static int
e82545_snapshot(struct vm_snapshot_meta *meta)
{
int i;
int ret;
struct e82545_softc *sc;
struct pci_devinst *pi;
uint64_t bitmap_value;
pi = meta->dev_data;
sc = pi->pi_arg;
/* esc_mevp and esc_mevpitr should be reinitiated at init. */
SNAPSHOT_VAR_OR_LEAVE(sc->esc_mac, meta, ret, done);
/* General */
SNAPSHOT_VAR_OR_LEAVE(sc->esc_CTRL, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCAL, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCAH, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCT, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_VET, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCTTV, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_LEDCTL, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_PBA, meta, ret, done);
/* Interrupt control */
SNAPSHOT_VAR_OR_LEAVE(sc->esc_irq_asserted, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_ICR, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_ITR, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_ICS, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_IMS, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_IMC, meta, ret, done);
/*
* Transmit
*
* The fields in the unions are in superposition to access certain
* bytes in the larger uint variables.
* e.g., ip_config = [ipcss|ipcso|ipcse0|ipcse1]
*/
SNAPSHOT_VAR_OR_LEAVE(sc->esc_txctx.lower_setup.ip_config, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_txctx.upper_setup.tcp_config, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_txctx.cmd_and_length, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_txctx.tcp_seg_setup.data, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_tx_enabled, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_tx_active, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_TXCW, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_TCTL, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_TIPG, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_AIT, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_tdba, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDBAL, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDBAH, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDLEN, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDH, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDHr, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDT, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_TIDV, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_TXDCTL, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_TADV, meta, ret, done);
/* Has dependency on esc_TDLEN; reoreder of fields from struct. */
SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->esc_txdesc, sc->esc_TDLEN,
true, meta, ret, done);
/* L2 frame acceptance */
for (i = 0; i < nitems(sc->esc_uni); i++) {
SNAPSHOT_VAR_OR_LEAVE(sc->esc_uni[i].eu_valid, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_uni[i].eu_addrsel, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_uni[i].eu_eth, meta, ret, done);
}
SNAPSHOT_BUF_OR_LEAVE(sc->esc_fmcast, sizeof(sc->esc_fmcast),
meta, ret, done);
SNAPSHOT_BUF_OR_LEAVE(sc->esc_fvlan, sizeof(sc->esc_fvlan),
meta, ret, done);
/* Receive */
SNAPSHOT_VAR_OR_LEAVE(sc->esc_rx_enabled, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_rx_active, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_rx_loopback, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_RCTL, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCRTL, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCRTH, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_rdba, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDBAL, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDBAH, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDLEN, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDH, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDT, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDTR, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_RXDCTL, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_RADV, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_RSRPD, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->esc_RXCSUM, meta, ret, done);
/* Has dependency on esc_RDLEN; reoreder of fields from struct. */
SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->esc_rxdesc, sc->esc_TDLEN,
true, meta, ret, done);
/* IO Port register access */
SNAPSHOT_VAR_OR_LEAVE(sc->io_addr, meta, ret, done);
/* Shadow copy of MDIC */
SNAPSHOT_VAR_OR_LEAVE(sc->mdi_control, meta, ret, done);
/* Shadow copy of EECD */
SNAPSHOT_VAR_OR_LEAVE(sc->eeprom_control, meta, ret, done);
/* Latest NVM in/out */
SNAPSHOT_VAR_OR_LEAVE(sc->nvm_data, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->nvm_opaddr, meta, ret, done);
/* Stats */
SNAPSHOT_VAR_OR_LEAVE(sc->missed_pkt_count, meta, ret, done);
SNAPSHOT_BUF_OR_LEAVE(sc->pkt_rx_by_size, sizeof(sc->pkt_rx_by_size),
meta, ret, done);
SNAPSHOT_BUF_OR_LEAVE(sc->pkt_tx_by_size, sizeof(sc->pkt_tx_by_size),
meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->good_pkt_rx_count, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->bcast_pkt_rx_count, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->mcast_pkt_rx_count, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->good_pkt_tx_count, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->bcast_pkt_tx_count, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->mcast_pkt_tx_count, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->oversize_rx_count, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->tso_tx_count, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->good_octets_rx, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->good_octets_tx, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->missed_octets, meta, ret, done);
if (meta->op == VM_SNAPSHOT_SAVE)
bitmap_value = sc->nvm_bits;
SNAPSHOT_VAR_OR_LEAVE(bitmap_value, meta, ret, done);
if (meta->op == VM_SNAPSHOT_RESTORE)
sc->nvm_bits = bitmap_value;
if (meta->op == VM_SNAPSHOT_SAVE)
bitmap_value = sc->nvm_bits;
SNAPSHOT_VAR_OR_LEAVE(bitmap_value, meta, ret, done);
if (meta->op == VM_SNAPSHOT_RESTORE)
sc->nvm_bits = bitmap_value;
/* EEPROM data */
SNAPSHOT_BUF_OR_LEAVE(sc->eeprom_data, sizeof(sc->eeprom_data),
meta, ret, done);
done:
return (ret);
}
#endif
struct pci_devemu pci_de_e82545 = {
.pe_emu = "e1000",
.pe_init = e82545_init,
.pe_barwrite = e82545_write,
.pe_barread = e82545_read
.pe_barread = e82545_read,
#ifdef BHYVE_SNAPSHOT
.pe_snapshot = e82545_snapshot,
#endif
};
PCI_EMUL_SET(pci_de_e82545);

View file

@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
#include <stdbool.h>
#include <machine/vmm.h>
#include <machine/vmm_snapshot.h>
#include <vmmapi.h>
#include "acpi.h"
@ -1962,6 +1963,191 @@ INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata);
INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata);
INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata);
#ifdef BHYVE_SNAPSHOT
/*
* Saves/restores PCI device emulated state. Returns 0 on success.
*/
static int
pci_snapshot_pci_dev(struct vm_snapshot_meta *meta)
{
struct pci_devinst *pi;
int i;
int ret;
pi = meta->dev_data;
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.enabled, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.addr, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.msg_data, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.maxmsgnum, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.enabled, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_bar, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_bar, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_offset, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_count, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_offset, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_size, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.function_mask, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_page_offset, meta, ret, done);
SNAPSHOT_BUF_OR_LEAVE(pi->pi_cfgdata, sizeof(pi->pi_cfgdata),
meta, ret, done);
for (i = 0; i < nitems(pi->pi_bar); i++) {
SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].type, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].size, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].addr, meta, ret, done);
}
/* Restore MSI-X table. */
for (i = 0; i < pi->pi_msix.table_count; i++) {
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].addr,
meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].msg_data,
meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].vector_control,
meta, ret, done);
}
done:
return (ret);
}
static int
pci_find_slotted_dev(const char *dev_name, struct pci_devemu **pde,
struct pci_devinst **pdi)
{
struct businfo *bi;
struct slotinfo *si;
struct funcinfo *fi;
int bus, slot, func;
assert(dev_name != NULL);
assert(pde != NULL);
assert(pdi != NULL);
for (bus = 0; bus < MAXBUSES; bus++) {
if ((bi = pci_businfo[bus]) == NULL)
continue;
for (slot = 0; slot < MAXSLOTS; slot++) {
si = &bi->slotinfo[slot];
for (func = 0; func < MAXFUNCS; func++) {
fi = &si->si_funcs[func];
if (fi->fi_name == NULL)
continue;
if (strcmp(dev_name, fi->fi_name))
continue;
*pde = pci_emul_finddev(fi->fi_name);
assert(*pde != NULL);
*pdi = fi->fi_devi;
return (0);
}
}
}
return (EINVAL);
}
int
pci_snapshot(struct vm_snapshot_meta *meta)
{
struct pci_devemu *pde;
struct pci_devinst *pdi;
int ret;
assert(meta->dev_name != NULL);
ret = pci_find_slotted_dev(meta->dev_name, &pde, &pdi);
if (ret != 0) {
fprintf(stderr, "%s: no such name: %s\r\n",
__func__, meta->dev_name);
memset(meta->buffer.buf_start, 0, meta->buffer.buf_size);
return (0);
}
meta->dev_data = pdi;
if (pde->pe_snapshot == NULL) {
fprintf(stderr, "%s: not implemented yet for: %s\r\n",
__func__, meta->dev_name);
return (-1);
}
ret = pci_snapshot_pci_dev(meta);
if (ret != 0) {
fprintf(stderr, "%s: failed to snapshot pci dev\r\n",
__func__);
return (-1);
}
ret = (*pde->pe_snapshot)(meta);
return (ret);
}
int
pci_pause(struct vmctx *ctx, const char *dev_name)
{
struct pci_devemu *pde;
struct pci_devinst *pdi;
int ret;
assert(dev_name != NULL);
ret = pci_find_slotted_dev(dev_name, &pde, &pdi);
if (ret != 0) {
/*
* It is possible to call this function without
* checking that the device is inserted first.
*/
fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name);
return (0);
}
if (pde->pe_pause == NULL) {
/* The pause/resume functionality is optional. */
fprintf(stderr, "%s: not implemented for: %s\n",
__func__, dev_name);
return (0);
}
return (*pde->pe_pause)(ctx, pdi);
}
int
pci_resume(struct vmctx *ctx, const char *dev_name)
{
struct pci_devemu *pde;
struct pci_devinst *pdi;
int ret;
assert(dev_name != NULL);
ret = pci_find_slotted_dev(dev_name, &pde, &pdi);
if (ret != 0) {
/*
* It is possible to call this function without
* checking that the device is inserted first.
*/
fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name);
return (0);
}
if (pde->pe_resume == NULL) {
/* The pause/resume functionality is optional. */
fprintf(stderr, "%s: not implemented for: %s\n",
__func__, dev_name);
return (0);
}
return (*pde->pe_resume)(ctx, pdi);
}
#endif
#define PCI_EMUL_TEST
#ifdef PCI_EMUL_TEST
/*
@ -1970,7 +2156,7 @@ INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata);
#define DIOSZ 8
#define DMEMSZ 4096
struct pci_emul_dsoftc {
uint8_t ioregs[DIOSZ];
uint8_t ioregs[DIOSZ];
uint8_t memregs[2][DMEMSZ];
};
@ -2062,7 +2248,7 @@ pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
} else {
printf("diow: memw unknown size %d\n", size);
}
/*
* magic interrupt ??
*/
@ -2087,7 +2273,7 @@ pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
offset, size);
return (0);
}
value = 0;
if (size == 1) {
value = sc->ioregs[offset];
@ -2106,7 +2292,7 @@ pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
offset, size);
return (0);
}
i = baridx - 1; /* 'memregs' index */
if (size == 1) {
@ -2131,11 +2317,23 @@ pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
return (value);
}
#ifdef BHYVE_SNAPSHOT
int
pci_emul_snapshot(struct vm_snapshot_meta *meta)
{
return (0);
}
#endif
struct pci_devemu pci_dummy = {
.pe_emu = "dummy",
.pe_init = pci_emul_dinit,
.pe_barwrite = pci_emul_diow,
.pe_barread = pci_emul_dior
.pe_barread = pci_emul_dior,
#ifdef BHYVE_SNAPSHOT
.pe_snapshot = pci_emul_snapshot,
#endif
};
PCI_EMUL_SET(pci_dummy);

View file

@ -45,6 +45,7 @@
struct vmctx;
struct pci_devinst;
struct memory_region;
struct vm_snapshot_meta;
struct pci_devemu {
char *pe_emu; /* Name of device emulation */
@ -71,6 +72,11 @@ struct pci_devemu {
uint64_t (*pe_barread)(struct vmctx *ctx, int vcpu,
struct pci_devinst *pi, int baridx,
uint64_t offset, int size);
/* Save/restore device state */
int (*pe_snapshot)(struct vm_snapshot_meta *meta);
int (*pe_pause)(struct vmctx *ctx, struct pci_devinst *pi);
int (*pe_resume)(struct vmctx *ctx, struct pci_devinst *pi);
};
#define PCI_EMUL_SET(x) DATA_SET(pci_devemu_set, x);
@ -246,6 +252,11 @@ void pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg);
void pci_write_dsdt(void);
uint64_t pci_ecfg_base(void);
int pci_bus_configured(int bus);
#ifdef BHYVE_SNAPSHOT
int pci_snapshot(struct vm_snapshot_meta *meta);
int pci_pause(struct vmctx *ctx, const char *dev_name);
int pci_resume(struct vmctx *ctx, const char *dev_name);
#endif
static __inline void
pci_set_cfgdata8(struct pci_devinst *pi, int offset, uint8_t val)

View file

@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mman.h>
#include <machine/vmm.h>
#include <machine/vmm_snapshot.h>
#include <vmmapi.h>
#include <stdio.h>
@ -440,10 +441,26 @@ pci_fbuf_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
return (error);
}
#ifdef BHYVE_SNAPSHOT
static int
pci_fbuf_snapshot(struct vm_snapshot_meta *meta)
{
int ret;
SNAPSHOT_BUF_OR_LEAVE(fbuf_sc->fb_base, FB_SIZE, meta, ret, err);
err:
return (ret);
}
#endif
struct pci_devemu pci_fbuf = {
.pe_emu = "fbuf",
.pe_init = pci_fbuf_init,
.pe_barwrite = pci_fbuf_write,
.pe_barread = pci_fbuf_read
.pe_barread = pci_fbuf_read,
#ifdef BHYVE_SNAPSHOT
.pe_snapshot = pci_fbuf_snapshot,
#endif
};
PCI_EMUL_SET(pci_fbuf);

View file

@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <machine/vmm.h>
#include <machine/vmm_snapshot.h>
#include <stdio.h>
#include <stdlib.h>
@ -452,12 +453,35 @@ lpc_pirq_routed(void)
pci_set_cfgdata8(lpc_bridge, 0x68 + pin, pirq_read(pin + 5));
}
#ifdef BHYVE_SNAPSHOT
static int
pci_lpc_snapshot(struct vm_snapshot_meta *meta)
{
int unit, ret;
struct uart_softc *sc;
for (unit = 0; unit < LPC_UART_NUM; unit++) {
sc = lpc_uart_softc[unit].uart_softc;
ret = uart_snapshot(sc, meta);
if (ret != 0)
goto done;
}
done:
return (ret);
}
#endif
struct pci_devemu pci_de_lpc = {
.pe_emu = "lpc",
.pe_init = pci_lpc_init,
.pe_write_dsdt = pci_lpc_write_dsdt,
.pe_cfgwrite = pci_lpc_cfgwrite,
.pe_barwrite = pci_lpc_write,
.pe_barread = pci_lpc_read
.pe_barread = pci_lpc_read,
#ifdef BHYVE_SNAPSHOT
.pe_snapshot = pci_lpc_snapshot,
#endif
};
PCI_EMUL_SET(pci_de_lpc);

View file

@ -39,6 +39,8 @@ __FBSDID("$FreeBSD$");
#include <sys/ioctl.h>
#include <sys/disk.h>
#include <machine/vmm_snapshot.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
@ -198,6 +200,11 @@ static void pci_vtblk_reset(void *);
static void pci_vtblk_notify(void *, struct vqueue_info *);
static int pci_vtblk_cfgread(void *, int, int, uint32_t *);
static int pci_vtblk_cfgwrite(void *, int, int, uint32_t);
#ifdef BHYVE_SNAPSHOT
static void pci_vtblk_pause(void *);
static void pci_vtblk_resume(void *);
static int pci_vtblk_snapshot(void *, struct vm_snapshot_meta *);
#endif
static struct virtio_consts vtblk_vi_consts = {
"vtblk", /* our name */
@ -209,6 +216,11 @@ static struct virtio_consts vtblk_vi_consts = {
pci_vtblk_cfgwrite, /* write PCI config */
NULL, /* apply negotiated features */
VTBLK_S_HOSTCAPS, /* our capabilities */
#ifdef BHYVE_SNAPSHOT
pci_vtblk_pause, /* pause blockif threads */
pci_vtblk_resume, /* resume blockif threads */
pci_vtblk_snapshot, /* save / restore device state */
#endif
};
static void
@ -241,6 +253,40 @@ pci_vtblk_done_locked(struct pci_vtblk_ioreq *io, int err)
vq_endchains(&sc->vbsc_vq, 0);
}
#ifdef BHYVE_SNAPSHOT
static void
pci_vtblk_pause(void *vsc)
{
struct pci_vtblk_softc *sc = vsc;
DPRINTF(("vtblk: device pause requested !\n"));
blockif_pause(sc->bc);
}
static void
pci_vtblk_resume(void *vsc)
{
struct pci_vtblk_softc *sc = vsc;
DPRINTF(("vtblk: device resume requested !\n"));
blockif_resume(sc->bc);
}
static int
pci_vtblk_snapshot(void *vsc, struct vm_snapshot_meta *meta)
{
int ret;
struct pci_vtblk_softc *sc = vsc;
SNAPSHOT_VAR_OR_LEAVE(sc->vbsc_cfg, meta, ret, done);
SNAPSHOT_BUF_OR_LEAVE(sc->vbsc_ident, sizeof(sc->vbsc_ident),
meta, ret, done);
done:
return (ret);
}
#endif
static void
pci_vtblk_done(struct blockif_req *br, int err)
{
@ -523,6 +569,9 @@ struct pci_devemu pci_de_vblk = {
.pe_emu = "virtio-blk",
.pe_init = pci_vtblk_init,
.pe_barwrite = vi_pci_write,
.pe_barread = vi_pci_read
.pe_barread = vi_pci_read,
#ifdef BHYVE_SNAPSHOT
.pe_snapshot = vi_pci_snapshot,
#endif
};
PCI_EMUL_SET(pci_de_vblk);

View file

@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
#include <sys/select.h>
#include <sys/uio.h>
#include <sys/ioctl.h>
#include <machine/vmm_snapshot.h>
#include <net/ethernet.h>
#include <net/if.h> /* IFNAMSIZ */
@ -134,6 +135,11 @@ static void pci_vtnet_reset(void *);
static int pci_vtnet_cfgread(void *, int, int, uint32_t *);
static int pci_vtnet_cfgwrite(void *, int, int, uint32_t);
static void pci_vtnet_neg_features(void *, uint64_t);
#ifdef BHYVE_SNAPSHOT
static void pci_vtnet_pause(void *);
static void pci_vtnet_resume(void *);
static int pci_vtnet_snapshot(void *, struct vm_snapshot_meta *);
#endif
static struct virtio_consts vtnet_vi_consts = {
"vtnet", /* our name */
@ -145,6 +151,11 @@ static struct virtio_consts vtnet_vi_consts = {
pci_vtnet_cfgwrite, /* write PCI config */
pci_vtnet_neg_features, /* apply negotiated features */
VTNET_S_HOSTCAPS, /* our capabilities */
#ifdef BHYVE_SNAPSHOT
pci_vtnet_pause, /* pause rx/tx threads */
pci_vtnet_resume, /* resume rx/tx threads */
pci_vtnet_snapshot, /* save / restore device state */
#endif
};
static void
@ -740,10 +751,80 @@ pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features)
assert(sc->be_vhdrlen == 0 || sc->be_vhdrlen == sc->vhdrlen);
}
#ifdef BHYVE_SNAPSHOT
static void
pci_vtnet_pause(void *vsc)
{
struct pci_vtnet_softc *sc = vsc;
DPRINTF(("vtnet: device pause requested !\n"));
/* Acquire the RX lock to block RX processing. */
pthread_mutex_lock(&sc->rx_mtx);
/* Wait for the transmit thread to finish its processing. */
pthread_mutex_lock(&sc->tx_mtx);
while (sc->tx_in_progress) {
pthread_mutex_unlock(&sc->tx_mtx);
usleep(10000);
pthread_mutex_lock(&sc->tx_mtx);
}
}
static void
pci_vtnet_resume(void *vsc)
{
struct pci_vtnet_softc *sc = vsc;
DPRINTF(("vtnet: device resume requested !\n"));
pthread_mutex_unlock(&sc->tx_mtx);
/* The RX lock should have been acquired in vtnet_pause. */
pthread_mutex_unlock(&sc->rx_mtx);
}
static int
pci_vtnet_snapshot(void *vsc, struct vm_snapshot_meta *meta)
{
int ret;
struct pci_vtnet_softc *sc = vsc;
DPRINTF(("vtnet: device snapshot requested !\n"));
/*
* Queues and consts should have been saved by the more generic
* vi_pci_snapshot function. We need to save only our features and
* config.
*/
SNAPSHOT_VAR_OR_LEAVE(sc->vsc_features, meta, ret, done);
/* Force reapply negociated features at restore time */
if (meta->op == VM_SNAPSHOT_RESTORE) {
pci_vtnet_neg_features(sc, sc->vsc_features);
netbe_rx_enable(sc->vsc_be);
}
SNAPSHOT_VAR_OR_LEAVE(sc->vsc_config, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->rx_merge, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->vhdrlen, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->be_vhdrlen, meta, ret, done);
done:
return (ret);
}
#endif
static struct pci_devemu pci_de_vnet = {
.pe_emu = "virtio-net",
.pe_init = pci_vtnet_init,
.pe_barwrite = vi_pci_write,
.pe_barread = vi_pci_read
.pe_barread = vi_pci_read,
#ifdef BHYVE_SNAPSHOT
.pe_snapshot = vi_pci_snapshot,
.pe_pause = vi_pci_pause,
.pe_resume = vi_pci_resume,
#endif
};
PCI_EMUL_SET(pci_de_vnet);

View file

@ -48,6 +48,8 @@ __FBSDID("$FreeBSD$");
#include <pthread.h>
#include <unistd.h>
#include <machine/vmm_snapshot.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usb.h>
#include <dev/usb/usb_freebsd.h>
@ -151,6 +153,8 @@ static int xhci_debug = 0;
#define FIELD_COPY(a,b,m,s) (((a) & ~((m) << (s))) | \
(((b) & ((m) << (s)))))
#define SNAP_DEV_NAME_LEN 128
struct pci_xhci_trb_ring {
uint64_t ringaddr; /* current dequeue guest address */
uint32_t ccs; /* consumer cycle state */
@ -286,9 +290,10 @@ struct pci_xhci_softc {
#define XHCI_HALTED(sc) ((sc)->opregs.usbsts & XHCI_STS_HCH)
#define XHCI_GADDR_SIZE(a) (XHCI_PADDR_SZ - \
(((uint64_t) (a)) & (XHCI_PADDR_SZ - 1)))
#define XHCI_GADDR(sc,a) paddr_guest2host((sc)->xsc_pi->pi_vmctx, \
(a), \
XHCI_PADDR_SZ - ((a) & (XHCI_PADDR_SZ-1)))
(a), XHCI_GADDR_SIZE(a))
static int xhci_in_use;
@ -2855,12 +2860,265 @@ pci_xhci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
return (error);
}
#ifdef BHYVE_SNAPSHOT
static void
pci_xhci_map_devs_slots(struct pci_xhci_softc *sc, int maps[])
{
int i, j;
struct pci_xhci_dev_emu *dev, *slot;
memset(maps, 0, sizeof(maps[0]) * XHCI_MAX_SLOTS);
for (i = 1; i <= XHCI_MAX_SLOTS; i++) {
for (j = 1; j <= XHCI_MAX_DEVS; j++) {
slot = XHCI_SLOTDEV_PTR(sc, i);
dev = XHCI_DEVINST_PTR(sc, j);
if (slot == dev)
maps[i] = j;
}
}
}
static int
pci_xhci_snapshot_ep(struct pci_xhci_softc *sc, struct pci_xhci_dev_emu *dev,
int idx, struct vm_snapshot_meta *meta)
{
int k;
int ret;
struct usb_data_xfer *xfer;
struct usb_data_xfer_block *xfer_block;
/* some sanity checks */
if (meta->op == VM_SNAPSHOT_SAVE)
xfer = dev->eps[idx].ep_xfer;
SNAPSHOT_VAR_OR_LEAVE(xfer, meta, ret, done);
if (xfer == NULL) {
ret = 0;
goto done;
}
if (meta->op == VM_SNAPSHOT_RESTORE) {
pci_xhci_init_ep(dev, idx);
xfer = dev->eps[idx].ep_xfer;
}
/* save / restore proper */
for (k = 0; k < USB_MAX_XFER_BLOCKS; k++) {
xfer_block = &xfer->data[k];
SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(xfer_block->buf,
XHCI_GADDR_SIZE(xfer_block->buf), true, meta, ret,
done);
SNAPSHOT_VAR_OR_LEAVE(xfer_block->blen, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(xfer_block->bdone, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(xfer_block->processed, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(xfer_block->hci_data, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(xfer_block->ccs, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(xfer_block->streamid, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(xfer_block->trbnext, meta, ret, done);
}
SNAPSHOT_VAR_OR_LEAVE(xfer->ureq, meta, ret, done);
if (xfer->ureq) {
/* xfer->ureq is not allocated at restore time */
if (meta->op == VM_SNAPSHOT_RESTORE)
xfer->ureq = malloc(sizeof(struct usb_device_request));
SNAPSHOT_BUF_OR_LEAVE(xfer->ureq,
sizeof(struct usb_device_request),
meta, ret, done);
}
SNAPSHOT_VAR_OR_LEAVE(xfer->ndata, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(xfer->head, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(xfer->tail, meta, ret, done);
done:
return (ret);
}
static int
pci_xhci_snapshot(struct vm_snapshot_meta *meta)
{
int i, j;
int ret;
int restore_idx;
struct pci_devinst *pi;
struct pci_xhci_softc *sc;
struct pci_xhci_portregs *port;
struct pci_xhci_dev_emu *dev;
char dname[SNAP_DEV_NAME_LEN];
int maps[XHCI_MAX_SLOTS + 1];
pi = meta->dev_data;
sc = pi->pi_arg;
SNAPSHOT_VAR_OR_LEAVE(sc->caplength, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->hcsparams1, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->hcsparams2, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->hcsparams3, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->hccparams1, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->dboff, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->rtsoff, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->hccparams2, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->regsend, meta, ret, done);
/* opregs */
SNAPSHOT_VAR_OR_LEAVE(sc->opregs.usbcmd, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->opregs.usbsts, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->opregs.pgsz, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->opregs.dnctrl, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->opregs.crcr, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->opregs.dcbaap, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->opregs.config, meta, ret, done);
/* opregs.cr_p */
SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->opregs.cr_p,
XHCI_GADDR_SIZE(sc->opregs.cr_p), false, meta, ret, done);
/* opregs.dcbaa_p */
SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->opregs.dcbaa_p,
XHCI_GADDR_SIZE(sc->opregs.dcbaa_p), false, meta, ret, done);
/* rtsregs */
SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.mfindex, meta, ret, done);
/* rtsregs.intrreg */
SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.iman, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.imod, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.erstsz, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.rsvd, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.erstba, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.erdp, meta, ret, done);
/* rtsregs.erstba_p */
SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->rtsregs.erstba_p,
XHCI_GADDR_SIZE(sc->rtsregs.erstba_p), false, meta, ret, done);
/* rtsregs.erst_p */
SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->rtsregs.erst_p,
XHCI_GADDR_SIZE(sc->rtsregs.erst_p), false, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.er_deq_seg, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.er_enq_idx, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.er_enq_seg, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.er_events_cnt, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.event_pcs, meta, ret, done);
/* sanity checking */
for (i = 1; i <= XHCI_MAX_DEVS; i++) {
dev = XHCI_DEVINST_PTR(sc, i);
if (dev == NULL)
continue;
if (meta->op == VM_SNAPSHOT_SAVE)
restore_idx = i;
SNAPSHOT_VAR_OR_LEAVE(restore_idx, meta, ret, done);
/* check if the restored device (when restoring) is sane */
if (restore_idx != i) {
fprintf(stderr, "%s: idx not matching: actual: %d, "
"expected: %d\r\n", __func__, restore_idx, i);
ret = EINVAL;
goto done;
}
if (meta->op == VM_SNAPSHOT_SAVE) {
memset(dname, 0, sizeof(dname));
strncpy(dname, dev->dev_ue->ue_emu, sizeof(dname) - 1);
}
SNAPSHOT_BUF_OR_LEAVE(dname, sizeof(dname), meta, ret, done);
if (meta->op == VM_SNAPSHOT_RESTORE) {
dname[sizeof(dname) - 1] = '\0';
if (strcmp(dev->dev_ue->ue_emu, dname)) {
fprintf(stderr, "%s: device names mismatch: "
"actual: %s, expected: %s\r\n",
__func__, dname, dev->dev_ue->ue_emu);
ret = EINVAL;
goto done;
}
}
}
/* portregs */
for (i = 1; i <= XHCI_MAX_DEVS; i++) {
port = XHCI_PORTREG_PTR(sc, i);
dev = XHCI_DEVINST_PTR(sc, i);
if (dev == NULL)
continue;
SNAPSHOT_VAR_OR_LEAVE(port->portsc, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->portpmsc, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->portli, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(port->porthlpmc, meta, ret, done);
}
/* slots */
if (meta->op == VM_SNAPSHOT_SAVE)
pci_xhci_map_devs_slots(sc, maps);
for (i = 1; i <= XHCI_MAX_SLOTS; i++) {
SNAPSHOT_VAR_OR_LEAVE(maps[i], meta, ret, done);
if (meta->op == VM_SNAPSHOT_SAVE) {
dev = XHCI_SLOTDEV_PTR(sc, i);
} else if (meta->op == VM_SNAPSHOT_RESTORE) {
if (maps[i] != 0)
dev = XHCI_DEVINST_PTR(sc, maps[i]);
else
dev = NULL;
XHCI_SLOTDEV_PTR(sc, i) = dev;
} else {
/* error */
ret = EINVAL;
goto done;
}
if (dev == NULL)
continue;
SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(dev->dev_ctx,
XHCI_GADDR_SIZE(dev->dev_ctx), false, meta, ret, done);
for (j = 1; j < XHCI_MAX_ENDPOINTS; j++) {
ret = pci_xhci_snapshot_ep(sc, dev, j, meta);
if (ret != 0)
goto done;
}
SNAPSHOT_VAR_OR_LEAVE(dev->dev_slotstate, meta, ret, done);
/* devices[i]->dev_sc */
dev->dev_ue->ue_snapshot(dev->dev_sc, meta);
/* devices[i]->hci */
SNAPSHOT_VAR_OR_LEAVE(dev->hci.hci_address, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(dev->hci.hci_port, meta, ret, done);
}
SNAPSHOT_VAR_OR_LEAVE(sc->ndevices, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->usb2_port_start, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->usb3_port_start, meta, ret, done);
done:
return (ret);
}
#endif
struct pci_devemu pci_de_xhci = {
.pe_emu = "xhci",
.pe_init = pci_xhci_init,
.pe_barwrite = pci_xhci_write,
.pe_barread = pci_xhci_read
.pe_barread = pci_xhci_read,
#ifdef BHYVE_SNAPSHOT
.pe_snapshot = pci_xhci_snapshot,
#endif
};
PCI_EMUL_SET(pci_de_xhci);

View file

@ -32,10 +32,13 @@ __FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <machine/vmm_snapshot.h>
#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <pthread.h>
#include <pthread_np.h>
@ -382,3 +385,17 @@ ps2kbd_init(struct atkbdc_softc *atkbdc_sc)
return (sc);
}
#ifdef BHYVE_SNAPSHOT
int
ps2kbd_snapshot(struct ps2kbd_softc *sc, struct vm_snapshot_meta *meta)
{
int ret;
SNAPSHOT_VAR_OR_LEAVE(sc->enabled, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->curcmd, meta, ret, done);
done:
return (ret);
}
#endif

View file

@ -32,10 +32,15 @@
#define _PS2KBD_H_
struct atkbdc_softc;
struct vm_snapshot_meta;
struct ps2kbd_softc *ps2kbd_init(struct atkbdc_softc *sc);
int ps2kbd_read(struct ps2kbd_softc *sc, uint8_t *val);
void ps2kbd_write(struct ps2kbd_softc *sc, uint8_t val);
#ifdef BHYVE_SNAPSHOT
int ps2kbd_snapshot(struct ps2kbd_softc *sc, struct vm_snapshot_meta *meta);
#endif
#endif /* _PS2KBD_H_ */

View file

@ -32,10 +32,13 @@ __FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <machine/vmm_snapshot.h>
#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <pthread.h>
#include <pthread_np.h>
@ -416,4 +419,23 @@ ps2mouse_init(struct atkbdc_softc *atkbdc_sc)
return (sc);
}
#ifdef BHYVE_SNAPSHOT
int
ps2mouse_snapshot(struct ps2mouse_softc *sc, struct vm_snapshot_meta *meta)
{
int ret;
SNAPSHOT_VAR_OR_LEAVE(sc->status, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->resolution, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->sampling_rate, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->ctrlenable, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->curcmd, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->cur_x, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->cur_y, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->delta_x, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->delta_y, meta, ret, done);
done:
return (ret);
}
#endif

View file

@ -32,6 +32,7 @@
#define _PS2MOUSE_H_
struct atkbdc_softc;
struct vm_snapshot_meta;
struct ps2mouse_softc *ps2mouse_init(struct atkbdc_softc *sc);
@ -40,4 +41,8 @@ void ps2mouse_write(struct ps2mouse_softc *sc, uint8_t val, int insert);
void ps2mouse_toggle(struct ps2mouse_softc *sc, int enable);
int ps2mouse_fifocnt(struct ps2mouse_softc *sc);
#ifdef BHYVE_SNAPSHOT
int ps2mouse_snapshot(struct ps2mouse_softc *sc, struct vm_snapshot_meta *meta);
#endif
#endif /* _PS2MOUSE_H_ */

1742
usr.sbin/bhyve/snapshot.c Normal file

File diff suppressed because it is too large Load diff

105
usr.sbin/bhyve/snapshot.h Normal file
View file

@ -0,0 +1,105 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2016 Flavius Anton
* Copyright (c) 2016 Mihai Tiganus
* Copyright (c) 2016-2019 Mihai Carabas
* Copyright (c) 2017-2019 Darius Mihai
* Copyright (c) 2017-2019 Elena Mihailescu
* Copyright (c) 2018-2019 Sergiu Weisz
* All rights reserved.
* The bhyve-snapshot feature was developed under sponsorships
* from Matthew Grooms.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _BHYVE_SNAPSHOT_
#define _BHYVE_SNAPSHOT_
#include <machine/vmm_snapshot.h>
#include <libxo/xo.h>
#include <ucl.h>
struct vmctx;
struct restore_state {
int kdata_fd;
int vmmem_fd;
void *kdata_map;
size_t kdata_len;
size_t vmmem_len;
struct ucl_parser *meta_parser;
ucl_object_t *meta_root_obj;
};
struct checkpoint_thread_info {
struct vmctx *ctx;
int socket_fd;
};
typedef int (*vm_snapshot_dev_cb)(struct vm_snapshot_meta *);
typedef int (*vm_pause_dev_cb) (struct vmctx *, const char *);
typedef int (*vm_resume_dev_cb) (struct vmctx *, const char *);
struct vm_snapshot_dev_info {
const char *dev_name; /* device name */
vm_snapshot_dev_cb snapshot_cb; /* callback for device snapshot */
vm_pause_dev_cb pause_cb; /* callback for device pause */
vm_resume_dev_cb resume_cb; /* callback for device resume */
};
struct vm_snapshot_kern_info {
const char *struct_name; /* kernel structure name*/
enum snapshot_req req; /* request type */
};
void destroy_restore_state(struct restore_state *rstate);
const char *lookup_vmname(struct restore_state *rstate);
int lookup_memflags(struct restore_state *rstate);
size_t lookup_memsize(struct restore_state *rstate);
int lookup_guest_ncpus(struct restore_state *rstate);
void checkpoint_cpu_add(int vcpu);
void checkpoint_cpu_resume(int vcpu);
void checkpoint_cpu_suspend(int vcpu);
int restore_vm_mem(struct vmctx *ctx, struct restore_state *rstate);
int vm_restore_kern_structs(struct vmctx *ctx, struct restore_state *rstate);
int vm_restore_user_devs(struct vmctx *ctx, struct restore_state *rstate);
int vm_pause_user_devs(struct vmctx *ctx);
int vm_resume_user_devs(struct vmctx *ctx);
int get_checkpoint_msg(int conn_fd, struct vmctx *ctx);
void *checkpoint_thread(void *param);
int init_checkpoint_thread(struct vmctx *ctx);
int load_restore_file(const char *filename, struct restore_state *rstate);
#endif

View file

@ -39,6 +39,8 @@ __FBSDID("$FreeBSD$");
#include <capsicum_helpers.h>
#endif
#include <machine/vmm_snapshot.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
@ -719,3 +721,35 @@ uart_set_backend(struct uart_softc *sc, const char *opts)
return (retval);
}
#ifdef BHYVE_SNAPSHOT
int
uart_snapshot(struct uart_softc *sc, struct vm_snapshot_meta *meta)
{
int ret;
SNAPSHOT_VAR_OR_LEAVE(sc->data, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->ier, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->lcr, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->mcr, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->lsr, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->msr, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->fcr, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->scr, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->dll, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->dlh, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->rxfifo.rindex, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->rxfifo.windex, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->rxfifo.num, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->rxfifo.size, meta, ret, done);
SNAPSHOT_BUF_OR_LEAVE(sc->rxfifo.buf, sizeof(sc->rxfifo.buf),
meta, ret, done);
sc->thre_int_pending = 1;
done:
return (ret);
}
#endif

View file

@ -31,10 +31,10 @@
#ifndef _UART_EMUL_H_
#define _UART_EMUL_H_
#define UART_IO_BAR_SIZE 8
struct uart_softc;
struct vm_snapshot_meta;
typedef void (*uart_intr_func_t)(void *arg);
struct uart_softc *uart_init(uart_intr_func_t intr_assert,
@ -44,4 +44,7 @@ int uart_legacy_alloc(int unit, int *ioaddr, int *irq);
uint8_t uart_read(struct uart_softc *sc, int offset);
void uart_write(struct uart_softc *sc, int offset, uint8_t value);
int uart_set_backend(struct uart_softc *sc, const char *opt);
#ifdef BHYVE_SNAPSHOT
int uart_snapshot(struct uart_softc *sc, struct vm_snapshot_meta *meta);
#endif
#endif

View file

@ -41,10 +41,10 @@
#define USB_XFER_IN 1
struct usb_hci;
struct usb_device_request;
struct usb_data_xfer;
struct vm_snapshot_meta;
/* Device emulation handlers */
struct usb_devemu {
@ -62,6 +62,7 @@ struct usb_devemu {
int (*ue_reset)(void *sc);
int (*ue_remove)(void *sc);
int (*ue_stop)(void *sc);
int (*ue_snapshot)(void *scarg, struct vm_snapshot_meta *meta);
};
#define USB_EMUL_SET(x) DATA_SET(usb_emu_set, x);
@ -148,7 +149,6 @@ enum USB_ERRCODE {
pthread_mutex_unlock(&((x)->mtx)); \
} while (0)
struct usb_devemu *usb_emu_finddev(char *name);
struct usb_data_xfer_block *usb_data_xfer_append(struct usb_data_xfer *xfer,

View file

@ -31,6 +31,8 @@ __FBSDID("$FreeBSD$");
#include <sys/time.h>
#include <machine/vmm_snapshot.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
@ -788,6 +790,29 @@ umouse_stop(void *scarg)
return (0);
}
#ifdef BHYVE_SNAPSHOT
static int
umouse_snapshot(void *scarg, struct vm_snapshot_meta *meta)
{
int ret;
struct umouse_softc *sc;
sc = scarg;
SNAPSHOT_VAR_OR_LEAVE(sc->um_report, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->newdata, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->hid.idle, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->hid.protocol, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->hid.feature, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->polling, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->prev_evt.tv_sec, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(sc->prev_evt.tv_usec, meta, ret, done);
done:
return (ret);
}
#endif
struct usb_devemu ue_mouse = {
.ue_emu = "tablet",
@ -798,6 +823,9 @@ struct usb_devemu ue_mouse = {
.ue_data = umouse_data_handler,
.ue_reset = umouse_reset,
.ue_remove = umouse_remove,
.ue_stop = umouse_stop
.ue_stop = umouse_stop,
#ifdef BHYVE_SNAPSHOT
.ue_snapshot = umouse_snapshot,
#endif
};
USB_EMUL_SET(ue_mouse);

View file

@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
#include <sys/uio.h>
#include <machine/atomic.h>
#include <machine/vmm_snapshot.h>
#include <stdio.h>
#include <stdint.h>
@ -806,3 +807,150 @@ vi_pci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
if (vs->vs_mtx)
pthread_mutex_unlock(vs->vs_mtx);
}
#ifdef BHYVE_SNAPSHOT
int
vi_pci_pause(struct vmctx *ctx, struct pci_devinst *pi)
{
struct virtio_softc *vs;
struct virtio_consts *vc;
vs = pi->pi_arg;
vc = vs->vs_vc;
vc = vs->vs_vc;
assert(vc->vc_pause != NULL);
(*vc->vc_pause)(DEV_SOFTC(vs));
return (0);
}
int
vi_pci_resume(struct vmctx *ctx, struct pci_devinst *pi)
{
struct virtio_softc *vs;
struct virtio_consts *vc;
vs = pi->pi_arg;
vc = vs->vs_vc;
vc = vs->vs_vc;
assert(vc->vc_resume != NULL);
(*vc->vc_resume)(DEV_SOFTC(vs));
return (0);
}
static int
vi_pci_snapshot_softc(struct virtio_softc *vs, struct vm_snapshot_meta *meta)
{
int ret;
SNAPSHOT_VAR_OR_LEAVE(vs->vs_flags, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vs->vs_negotiated_caps, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vs->vs_curq, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vs->vs_status, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vs->vs_isr, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vs->vs_msix_cfg_idx, meta, ret, done);
done:
return (ret);
}
static int
vi_pci_snapshot_consts(struct virtio_consts *vc, struct vm_snapshot_meta *meta)
{
int ret;
SNAPSHOT_VAR_CMP_OR_LEAVE(vc->vc_nvq, meta, ret, done);
SNAPSHOT_VAR_CMP_OR_LEAVE(vc->vc_cfgsize, meta, ret, done);
SNAPSHOT_VAR_CMP_OR_LEAVE(vc->vc_hv_caps, meta, ret, done);
done:
return (ret);
}
static int
vi_pci_snapshot_queues(struct virtio_softc *vs, struct vm_snapshot_meta *meta)
{
int i;
int ret;
struct virtio_consts *vc;
struct vqueue_info *vq;
uint64_t addr_size;
vc = vs->vs_vc;
/* Save virtio queue info */
for (i = 0; i < vc->vc_nvq; i++) {
vq = &vs->vs_queues[i];
SNAPSHOT_VAR_CMP_OR_LEAVE(vq->vq_qsize, meta, ret, done);
SNAPSHOT_VAR_CMP_OR_LEAVE(vq->vq_num, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vq->vq_flags, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vq->vq_last_avail, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vq->vq_next_used, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vq->vq_save_used, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vq->vq_msix_idx, meta, ret, done);
SNAPSHOT_VAR_OR_LEAVE(vq->vq_pfn, meta, ret, done);
addr_size = vq->vq_qsize * sizeof(struct virtio_desc);
SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(vq->vq_desc, addr_size,
false, meta, ret, done);
addr_size = (2 + vq->vq_qsize + 1) * sizeof(uint16_t);
SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(vq->vq_avail, addr_size,
false, meta, ret, done);
addr_size = (2 + 2 * vq->vq_qsize + 1) * sizeof(uint16_t);
SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(vq->vq_used, addr_size,
false, meta, ret, done);
SNAPSHOT_BUF_OR_LEAVE(vq->vq_desc, vring_size(vq->vq_qsize),
meta, ret, done);
}
done:
return (ret);
}
int
vi_pci_snapshot(struct vm_snapshot_meta *meta)
{
int ret;
struct pci_devinst *pi;
struct virtio_softc *vs;
struct virtio_consts *vc;
pi = meta->dev_data;
vs = pi->pi_arg;
vc = vs->vs_vc;
/* Save virtio softc */
ret = vi_pci_snapshot_softc(vs, meta);
if (ret != 0)
goto done;
/* Save virtio consts */
ret = vi_pci_snapshot_consts(vc, meta);
if (ret != 0)
goto done;
/* Save virtio queue info */
ret = vi_pci_snapshot_queues(vs, meta);
if (ret != 0)
goto done;
/* Save device softc, if needed */
if (vc->vc_snapshot != NULL) {
ret = (*vc->vc_snapshot)(DEV_SOFTC(vs), meta);
if (ret != 0)
goto done;
}
done:
return (ret);
}
#endif

View file

@ -287,6 +287,7 @@ vring_size(u_int qsz)
struct vmctx;
struct pci_devinst;
struct vqueue_info;
struct vm_snapshot_meta;
/*
* A virtual device, with some number (possibly 0) of virtual
@ -361,6 +362,10 @@ struct virtio_consts {
void (*vc_apply_features)(void *, uint64_t);
/* called to apply negotiated features */
uint64_t vc_hv_caps; /* hypervisor-provided capabilities */
void (*vc_pause)(void *); /* called to pause device activity */
void (*vc_resume)(void *); /* called to resume device activity */
int (*vc_snapshot)(void *, struct vm_snapshot_meta *);
/* called to save / restore device state */
};
/*
@ -491,4 +496,9 @@ uint64_t vi_pci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
int baridx, uint64_t offset, int size);
void vi_pci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
int baridx, uint64_t offset, int size, uint64_t value);
#ifdef BHYVE_SNAPSHOT
int vi_pci_snapshot(struct vm_snapshot_meta *meta);
int vi_pci_pause(struct vmctx *ctx, struct pci_devinst *pi);
int vi_pci_resume(struct vmctx *ctx, struct pci_devinst *pi);
#endif
#endif /* _VIRTIO_H_ */

View file

@ -2,6 +2,8 @@
# $FreeBSD$
#
.include <src.opts.mk>
PROG= bhyvectl
SRCS= bhyvectl.c
PACKAGE= bhyve
@ -14,4 +16,8 @@ WARNS?= 3
CFLAGS+= -I${SRCTOP}/sys/amd64/vmm
.if ${MK_BHYVE_SNAPSHOT} != "no"
CFLAGS+= -DBHYVE_SNAPSHOT
.endif
.include <bsd.prog.mk>

View file

@ -24,7 +24,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd November 13, 2016
.Dd May 04, 2020
.Dt BHYVECTL 8
.Os
.Sh NAME
@ -39,6 +39,8 @@
.Op Fl -inject-nmi
.Op Fl -force-reset
.Op Fl -force-poweroff
.Op Fl -checkpoint= Ns Ar <filename>
.Op Fl -suspend= Ns Ar <filename>
.Sh DESCRIPTION
The
.Nm
@ -72,6 +74,17 @@ Inject a non-maskable interrupt (NMI) into the VM.
Force the VM to reset.
.It Fl -force-poweroff
Force the VM to power off.
.It Fl -checkpoint= Ns Ar <filename>
Save a snapshot of a virtual machine.
The guest memory contents are saved in the file given in
.Ar <filename> .
The guest device and vCPU state are saved in the file
.Ar <filename>.kern .
.It Fl -suspend= Ns Ar <filename>
Save a snapshot of a virtual machine similar to
.Fl -checkpoint .
The virtual machine will terminate after the snapshot has been
saved.
.El
.Sh EXIT STATUS
.Ex -std
@ -79,6 +92,10 @@ Force the VM to power off.
Destroy the VM called fbsd10:
.Pp
.Dl "bhyvectl --vm=fbsd10 --destroy"
.Sh COMPATIBILITY
The snapshot file format is not yet stable and is subject to future changes.
Backwards compatibility support for the current snapshot file format is not
guaranteed when future changes are made.
.Sh SEE ALSO
.Xr bhyve 8 ,
.Xr bhyveload 8

View file

@ -57,6 +57,9 @@ __FBSDID("$FreeBSD$");
#include <machine/vmm_dev.h>
#include <vmmapi.h>
#include <sys/socket.h>
#include <sys/un.h>
#include "amd/vmcb.h"
#include "intel/vmcs.h"
@ -67,6 +70,9 @@ __FBSDID("$FreeBSD$");
#define NO_ARG no_argument
#define OPT_ARG optional_argument
#define CHECKPOINT_RUN_DIR "/var/run/bhyve/checkpoint"
#define MAX_VMNAME 100
static const char *progname;
static void
@ -78,6 +84,10 @@ usage(bool cpu_intel)
" [--cpu=<vcpu_number>]\n"
" [--create]\n"
" [--destroy]\n"
#ifdef BHYVE_SNAPSHOT
" [--checkpoint=<filename>]\n"
" [--suspend=<filename>]\n"
#endif
" [--get-all]\n"
" [--get-stats]\n"
" [--set-desc-ds]\n"
@ -287,6 +297,10 @@ enum x2apic_state x2apic_state;
static int unassign_pptdev, bus, slot, func;
static int run;
static int get_cpu_topology;
#ifdef BHYVE_SNAPSHOT
static int vm_checkpoint_opt;
static int vm_suspend_opt;
#endif
/*
* VMCB specific.
@ -591,6 +605,10 @@ enum {
SET_RTC_TIME,
SET_RTC_NVRAM,
RTC_NVRAM_OFFSET,
#ifdef BHYVE_SNAPSHOT
SET_CHECKPOINT_FILE,
SET_SUSPEND_FILE,
#endif
};
static void
@ -1461,6 +1479,10 @@ setup_options(bool cpu_intel)
{ "get-suspended-cpus", NO_ARG, &get_suspended_cpus, 1 },
{ "get-intinfo", NO_ARG, &get_intinfo, 1 },
{ "get-cpu-topology", NO_ARG, &get_cpu_topology, 1 },
#ifdef BHYVE_SNAPSHOT
{ "checkpoint", REQ_ARG, 0, SET_CHECKPOINT_FILE},
{ "suspend", REQ_ARG, 0, SET_SUSPEND_FILE},
#endif
};
const struct option intel_opts[] = {
@ -1678,6 +1700,82 @@ show_memseg(struct vmctx *ctx)
}
}
#ifdef BHYVE_SNAPSHOT
static int
send_checkpoint_op_req(struct vmctx *ctx, struct checkpoint_op *op)
{
struct sockaddr_un addr;
int socket_fd, len, len_sent, total_sent;
int err = 0;
char vmname_buf[MAX_VMNAME];
socket_fd = socket(PF_UNIX, SOCK_STREAM, 0);
if (socket_fd < 0) {
perror("Error creating bhyvectl socket");
err = -1;
goto done;
}
memset(&addr, 0, sizeof(struct sockaddr_un));
addr.sun_family = AF_UNIX;
err = vm_get_name(ctx, vmname_buf, MAX_VMNAME - 1);
if (err != 0) {
perror("Failed to get VM name");
goto done;
}
snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s", CHECKPOINT_RUN_DIR, vmname_buf);
if (connect(socket_fd, (struct sockaddr *)&addr,
sizeof(struct sockaddr_un)) != 0) {
perror("Connect to VM socket failed");
err = -1;
goto done;
}
len = sizeof(*op);
total_sent = 0;
while ((len_sent = send(socket_fd, (char *)op + total_sent, len - total_sent, 0)) > 0) {
total_sent += len_sent;
}
if (len_sent < 0) {
perror("Failed to send checkpoint operation request");
err = -1;
}
done:
if (socket_fd > 0)
close(socket_fd);
return (err);
}
static int
send_start_checkpoint(struct vmctx *ctx, const char *checkpoint_file)
{
struct checkpoint_op op;
op.op = START_CHECKPOINT;
strncpy(op.snapshot_filename, checkpoint_file, MAX_SNAPSHOT_VMNAME);
op.snapshot_filename[MAX_SNAPSHOT_VMNAME - 1] = 0;
return (send_checkpoint_op_req(ctx, &op));
}
static int
send_start_suspend(struct vmctx *ctx, const char *suspend_file)
{
struct checkpoint_op op;
op.op = START_SUSPEND;
strncpy(op.snapshot_filename, suspend_file, MAX_SNAPSHOT_VMNAME);
op.snapshot_filename[MAX_SNAPSHOT_VMNAME - 1] = 0;
return (send_checkpoint_op_req(ctx, &op));
}
#endif
int
main(int argc, char *argv[])
{
@ -1694,6 +1792,9 @@ main(int argc, char *argv[])
uint64_t cs, ds, es, fs, gs, ss, tr, ldtr;
struct tm tm;
struct option *opts;
#ifdef BHYVE_SNAPSHOT
char *checkpoint_file, *suspend_file;
#endif
cpu_intel = cpu_vendor_intel();
opts = setup_options(cpu_intel);
@ -1860,6 +1961,16 @@ main(int argc, char *argv[])
case ASSERT_LAPIC_LVT:
assert_lapic_lvt = atoi(optarg);
break;
#ifdef BHYVE_SNAPSHOT
case SET_CHECKPOINT_FILE:
vm_checkpoint_opt = 1;
checkpoint_file = optarg;
break;
case SET_SUSPEND_FILE:
vm_suspend_opt = 1;
suspend_file = optarg;
break;
#endif
default:
usage(cpu_intel);
}
@ -2345,6 +2456,14 @@ main(int argc, char *argv[])
if (!error && destroy)
vm_destroy(ctx);
#ifdef BHYVE_SNAPSHOT
if (!error && vm_checkpoint_opt)
error = send_start_checkpoint(ctx, checkpoint_file);
if (!error && vm_suspend_opt)
error = send_start_suspend(ctx, suspend_file);
#endif
free (opts);
exit(error);
}