Add the ability to control the CPU topology of created VMs

from userland without the need to use sysctls, it allows the old
sysctls to continue to function, but deprecates them at
FreeBSD_version 1200060 (Relnotes for deprecate).

The command line of bhyve is maintained in a backwards compatible way.
The API of libvmmapi is maintained in a backwards compatible way.
The sysctl's are maintained in a backwards compatible way.

Added command option looks like:
bhyve -c [[cpus=]n][,sockets=n][,cores=n][,threads=n][,maxcpus=n]
The optional parts can be specified in any order, but only a single
integer invokes the backwards compatible parse.  [,maxcpus=n] is
hidden by #ifdef until kernel support is added, though the api
is put in place.

bhyvectl --get-cpu-topology option added.

Reviewed by:	grehan (maintainer, earlier version),
Reviewed by:	bcr (manpages)
Approved by:	bde (mentor), phk (mentor)
Tested by:	Oleg Ginzburg <olevole@olevole.ru> (cbsd)
MFC after:	1 week
Relnotes:	Y
Differential Revision:	https://reviews.freebsd.org/D9930
This commit is contained in:
Rodney W. Grimes 2018-04-08 19:24:49 +00:00
parent 92223bdded
commit 01d822d33b
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=332298
10 changed files with 277 additions and 32 deletions

View file

@ -1505,6 +1505,38 @@ vm_restart_instruction(void *arg, int vcpu)
return (ioctl(ctx->fd, VM_RESTART_INSTRUCTION, &vcpu));
}
int
vm_set_topology(struct vmctx *ctx,
uint16_t sockets, uint16_t cores, uint16_t threads, uint16_t maxcpus)
{
struct vm_cpu_topology topology;
bzero(&topology, sizeof (struct vm_cpu_topology));
topology.sockets = sockets;
topology.cores = cores;
topology.threads = threads;
topology.maxcpus = maxcpus;
return (ioctl(ctx->fd, VM_SET_TOPOLOGY, &topology));
}
int
vm_get_topology(struct vmctx *ctx,
uint16_t *sockets, uint16_t *cores, uint16_t *threads, uint16_t *maxcpus)
{
struct vm_cpu_topology topology;
int error;
bzero(&topology, sizeof (struct vm_cpu_topology));
error = ioctl(ctx->fd, VM_GET_TOPOLOGY, &topology);
if (error == 0) {
*sockets = topology.sockets;
*cores = topology.cores;
*threads = topology.threads;
*maxcpus = topology.maxcpus;
}
return (error);
}
int
vm_get_device_fd(struct vmctx *ctx)
{
@ -1535,7 +1567,7 @@ vm_get_ioctls(size_t *len)
VM_ACTIVATE_CPU, VM_GET_CPUS, VM_SUSPEND_CPU, VM_RESUME_CPU,
VM_SET_INTINFO, VM_GET_INTINFO,
VM_RTC_WRITE, VM_RTC_READ, VM_RTC_SETTIME, VM_RTC_GETTIME,
VM_RESTART_INSTRUCTION };
VM_RESTART_INSTRUCTION, VM_SET_TOPOLOGY, VM_GET_TOPOLOGY };
if (len == NULL) {
cmds = malloc(sizeof(vm_ioctl_cmds));

View file

@ -221,6 +221,12 @@ int vm_activate_cpu(struct vmctx *ctx, int vcpu);
int vm_suspend_cpu(struct vmctx *ctx, int vcpu);
int vm_resume_cpu(struct vmctx *ctx, int vcpu);
/* CPU topology */
int vm_set_topology(struct vmctx *ctx, uint16_t sockets, uint16_t cores,
uint16_t threads, uint16_t maxcpus);
int vm_get_topology(struct vmctx *ctx, uint16_t *sockets, uint16_t *cores,
uint16_t *threads, uint16_t *maxcpus);
/*
* FreeBSD specific APIs
*/

View file

@ -181,6 +181,10 @@ int vm_create(const char *name, struct vm **retvm);
void vm_destroy(struct vm *vm);
int vm_reinit(struct vm *vm);
const char *vm_name(struct vm *vm);
void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
uint16_t *threads, uint16_t *maxcpus);
int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
uint16_t threads, uint16_t maxcpus);
/*
* APIs that modify the guest memory map require all vcpus to be frozen.

View file

@ -226,6 +226,13 @@ struct vm_rtc_data {
uint8_t value;
};
struct vm_cpu_topology {
uint16_t sockets;
uint16_t cores;
uint16_t threads;
uint16_t maxcpus;
};
enum {
/* general routines */
IOCNUM_ABIVERS = 0,
@ -284,6 +291,10 @@ enum {
IOCNUM_GET_X2APIC_STATE = 61,
IOCNUM_GET_HPET_CAPABILITIES = 62,
/* CPU Topology */
IOCNUM_SET_TOPOLOGY = 63,
IOCNUM_GET_TOPOLOGY = 64,
/* legacy interrupt injection */
IOCNUM_ISA_ASSERT_IRQ = 80,
IOCNUM_ISA_DEASSERT_IRQ = 81,
@ -379,6 +390,10 @@ enum {
_IOWR('v', IOCNUM_GET_X2APIC_STATE, struct vm_x2apic)
#define VM_GET_HPET_CAPABILITIES \
_IOR('v', IOCNUM_GET_HPET_CAPABILITIES, struct vm_hpet_cap)
#define VM_SET_TOPOLOGY \
_IOW('v', IOCNUM_SET_TOPOLOGY, struct vm_cpu_topology)
#define VM_GET_TOPOLOGY \
_IOR('v', IOCNUM_GET_TOPOLOGY, struct vm_cpu_topology)
#define VM_GET_GPA_PMAP \
_IOWR('v', IOCNUM_GET_GPA_PMAP, struct vm_gpa_pte)
#define VM_GLA2GPA \

View file

@ -166,6 +166,11 @@ struct vm {
struct vmspace *vmspace; /* (o) guest's address space */
char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */
struct vcpu vcpu[VM_MAXCPU]; /* (i) guest vcpus */
/* The following describe the vm cpu topology */
uint16_t sockets; /* (o) num of sockets */
uint16_t cores; /* (o) num of cores/socket */
uint16_t threads; /* (o) num of threads/core */
uint16_t maxcpus; /* (o) max pluggable cpus */
};
static int vmm_initialized;
@ -425,6 +430,12 @@ vm_init(struct vm *vm, bool create)
vcpu_init(vm, i, create);
}
/*
* The default CPU topology is a single thread per package.
*/
u_int cores_per_package = 1;
u_int threads_per_core = 1;
int
vm_create(const char *name, struct vm **retvm)
{
@ -450,12 +461,43 @@ vm_create(const char *name, struct vm **retvm)
vm->vmspace = vmspace;
mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
vm->sockets = 1;
vm->cores = cores_per_package; /* XXX backwards compatibility */
vm->threads = threads_per_core; /* XXX backwards compatibility */
vm->maxcpus = 0; /* XXX not implemented */
vm_init(vm, true);
*retvm = vm;
return (0);
}
void
vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
uint16_t *threads, uint16_t *maxcpus)
{
*sockets = vm->sockets;
*cores = vm->cores;
*threads = vm->threads;
*maxcpus = vm->maxcpus;
}
int
vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
uint16_t threads, uint16_t maxcpus)
{
if (maxcpus != 0)
return (EINVAL); /* XXX remove when supported */
if ((sockets * cores * threads) > VM_MAXCPU)
return (EINVAL);
/* XXX need to check sockets * cores * threads == vCPU, how? */
vm->sockets = sockets;
vm->cores = cores;
vm->threads = threads;
vm->maxcpus = maxcpus;
return(0);
}
static void
vm_cleanup(struct vm *vm, bool destroy)
{

View file

@ -346,6 +346,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
struct vm_rtc_time *rtctime;
struct vm_rtc_data *rtcdata;
struct vm_memmap *mm;
struct vm_cpu_topology *topology;
uint64_t *regvals;
int *regnums;
@ -737,6 +738,17 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
case VM_RESTART_INSTRUCTION:
error = vm_restart_instruction(sc->vm, vcpu);
break;
case VM_SET_TOPOLOGY:
topology = (struct vm_cpu_topology *)data;
error = vm_set_topology(sc->vm, topology->sockets,
topology->cores, topology->threads, topology->maxcpus);
break;
case VM_GET_TOPOLOGY:
topology = (struct vm_cpu_topology *)data;
vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
&topology->threads, &topology->maxcpus);
error = 0;
break;
default:
error = ENOTTY;
break;

View file

@ -60,16 +60,15 @@ static uint64_t bhyve_xcpuids;
SYSCTL_ULONG(_hw_vmm, OID_AUTO, bhyve_xcpuids, CTLFLAG_RW, &bhyve_xcpuids, 0,
"Number of times an unknown cpuid leaf was accessed");
/*
* The default CPU topology is a single thread per package.
*/
static u_int threads_per_core = 1;
#if __FreeBSD_version < 1200060 /* Remove after 11 EOL helps MFCing */
extern u_int threads_per_core;
SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, threads_per_core, CTLFLAG_RDTUN,
&threads_per_core, 0, NULL);
static u_int cores_per_package = 1;
extern u_int cores_per_package;
SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, cores_per_package, CTLFLAG_RDTUN,
&cores_per_package, 0, NULL);
#endif
static int cpuid_leaf_b = 1;
SYSCTL_INT(_hw_vmm_topology, OID_AUTO, cpuid_leaf_b, CTLFLAG_RDTUN,
@ -95,6 +94,7 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
int error, enable_invpcid, level, width, x2apic_id;
unsigned int func, regs[4], logical_cpus;
enum x2apic_state x2apic_state;
uint16_t cores, maxcpus, sockets, threads;
VCPU_CTR2(vm, vcpu_id, "cpuid %#x,%#x", *eax, *ecx);
@ -142,11 +142,11 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
*
* However this matches the logical cpus as
* advertised by leaf 0x1 and will work even
* if the 'threads_per_core' tunable is set
* incorrectly on an AMD host.
* if threads is set incorrectly on an AMD host.
*/
logical_cpus = threads_per_core *
cores_per_package;
vm_get_topology(vm, &sockets, &cores, &threads,
&maxcpus);
logical_cpus = threads * cores;
regs[2] = logical_cpus - 1;
}
break;
@ -305,7 +305,9 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
*/
regs[3] |= (CPUID_MCA | CPUID_MCE | CPUID_MTRR);
logical_cpus = threads_per_core * cores_per_package;
vm_get_topology(vm, &sockets, &cores, &threads,
&maxcpus);
logical_cpus = threads * cores;
regs[1] &= ~CPUID_HTT_CORES;
regs[1] |= (logical_cpus & 0xff) << 16;
regs[3] |= CPUID_HTT;
@ -315,8 +317,10 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
cpuid_count(*eax, *ecx, regs);
if (regs[0] || regs[1] || regs[2] || regs[3]) {
vm_get_topology(vm, &sockets, &cores, &threads,
&maxcpus);
regs[0] &= 0x3ff;
regs[0] |= (cores_per_package - 1) << 26;
regs[0] |= (cores - 1) << 26;
/*
* Cache topology:
* - L1 and L2 are shared only by the logical
@ -324,10 +328,10 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
* - L3 and above are shared by all logical
* processors in the package.
*/
logical_cpus = threads_per_core;
logical_cpus = threads;
level = (regs[0] >> 5) & 0x7;
if (level >= 3)
logical_cpus *= cores_per_package;
logical_cpus *= cores;
regs[0] |= (logical_cpus - 1) << 14;
}
break;
@ -389,16 +393,17 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
/*
* Processor topology enumeration
*/
vm_get_topology(vm, &sockets, &cores, &threads,
&maxcpus);
if (*ecx == 0) {
logical_cpus = threads_per_core;
logical_cpus = threads;
width = log2(logical_cpus);
level = CPUID_TYPE_SMT;
x2apic_id = vcpu_id;
}
if (*ecx == 1) {
logical_cpus = threads_per_core *
cores_per_package;
logical_cpus = threads * cores;
width = log2(logical_cpus);
level = CPUID_TYPE_CORE;
x2apic_id = vcpu_id;

View file

@ -24,7 +24,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd June 2, 2017
.Dd April 6, 2018
.Dt BHYVE 8
.Os
.Sh NAME
@ -33,7 +33,16 @@
.Sh SYNOPSIS
.Nm
.Op Fl abehuwxACHPSWY
.Op Fl c Ar numcpus
.Oo
.Fl c\~ Ns
.Oo
.Op Ar cpus= Ns
.Ar numcpus Ns
.Oc Ns
.Op Ar ,sockets=n Ns
.Op Ar ,cores=n Ns
.Op Ar ,threads=n
.Oc
.Op Fl g Ar gdbport
.Op Fl l Ar lpcdev Ns Op , Ns Ar conf
.Op Fl m Ar memsize Ns Op Ar K|k|M|m|G|g|T|t
@ -77,9 +86,30 @@ Enable a low-level console device supported by
kernels compiled with
.Cd "device bvmconsole" .
This option will be deprecated in a future version.
.It Fl c Ar numcpus
Number of guest virtual CPUs.
The default is 1 and the maximum is 16.
.It Fl c Op Ar setting ...
Number of guest virtual CPUs
and/or the CPU topology.
The default value for each of
.Ar numcpus ,
.Ar sockets ,
.Ar cores ,
and
.Ar threads
is 1.
The current maximum number of guest virtual CPUs is 16.
If
.Ar numcpus
is not specified then it will be calculated from the other arguments.
The topology must be consistent in that the
.Ar numcpus
must equal the product of
.Ar sockets ,
.Ar cores ,
and
.Ar threads .
If a
.Ar setting
is specified more than once the last one has precedence.
.It Fl C
Include guest memory in core file.
.It Fl e

View file

@ -57,6 +57,7 @@ __FBSDID("$FreeBSD$");
#include <pthread_np.h>
#include <sysexits.h>
#include <stdbool.h>
#include <stdint.h>
#include <machine/vmm.h>
#ifndef WITHOUT_CAPSICUM
@ -93,6 +94,8 @@ extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu);
char *vmname;
int guest_ncpus;
uint16_t cores, maxcpus, sockets, threads;
char *guest_uuid_str;
static int guest_vmexit_on_hlt, guest_vmexit_on_pause;
@ -137,11 +140,13 @@ usage(int code)
{
fprintf(stderr,
"Usage: %s [-abehuwxACHPSWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n"
"Usage: %s [-abehuwxACHPSWY]\n"
" %*s [-c [[cpus=]numcpus][,sockets=n][,cores=n][,threads=n]]\n"
" %*s [-g <gdb port>] [-l <lpc>]\n"
" %*s [-m mem] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n"
" -a: local apic is in xAPIC mode (deprecated)\n"
" -A: create ACPI tables\n"
" -c: # cpus (default 1)\n"
" -c: number of cpus and/or topology specification"
" -C: include guest memory in core file\n"
" -e: exit on unhandled I/O access\n"
" -g: gdb port\n"
@ -159,11 +164,91 @@ usage(int code)
" -W: force virtio to use single-vector MSI\n"
" -x: local apic is in x2APIC mode\n"
" -Y: disable MPtable generation\n",
progname, (int)strlen(progname), "");
progname, (int)strlen(progname), "", (int)strlen(progname), "",
(int)strlen(progname), "");
exit(code);
}
/*
* XXX This parser is known to have the following issues:
* 1. It accepts null key=value tokens ",,".
* 2. It accepts whitespace after = and before value.
* 3. Values out of range of INT are silently wrapped.
* 4. It doesn't check non-final values.
* 5. The apparently bogus limits of UINT16_MAX are for future expansion.
*
* The acceptance of a null specification ('-c ""') is by design to match the
* manual page syntax specification, this results in a topology of 1 vCPU.
*/
static int
topology_parse(const char *opt)
{
uint64_t ncpus;
int c, chk, n, s, t, tmp;
char *cp, *str;
bool ns, scts;
c = 1, n = 1, s = 1, t = 1;
ns = false, scts = false;
str = strdup(opt);
while ((cp = strsep(&str, ",")) != NULL) {
if (sscanf(cp, "%i%n", &tmp, &chk) == 1) {
n = tmp;
ns = true;
} else if (sscanf(cp, "cpus=%i%n", &tmp, &chk) == 1) {
n = tmp;
ns = true;
} else if (sscanf(cp, "sockets=%i%n", &tmp, &chk) == 1) {
s = tmp;
scts = true;
} else if (sscanf(cp, "cores=%i%n", &tmp, &chk) == 1) {
c = tmp;
scts = true;
} else if (sscanf(cp, "threads=%i%n", &tmp, &chk) == 1) {
t = tmp;
scts = true;
#ifdef notyet /* Do not expose this until vmm.ko implements it */
} else if (sscanf(cp, "maxcpus=%i%n", &tmp, &chk) == 1) {
m = tmp;
#endif
/* Skip the empty argument case from -c "" */
} else if (cp[0] == '\0')
continue;
else
return (-1);
/* Any trailing garbage causes an error */
if (cp[chk] != '\0')
return (-1);
}
/*
* Range check 1 <= n <= UINT16_MAX all values
*/
if (n < 1 || s < 1 || c < 1 || t < 1 ||
n > UINT16_MAX || s > UINT16_MAX || c > UINT16_MAX ||
t > UINT16_MAX)
return (-1);
/* If only the cpus was specified, use that as sockets */
if (!scts)
s = n;
/*
* Compute sockets * cores * threads avoiding overflow
* The range check above insures these are 16 bit values
* If n was specified check it against computed ncpus
*/
ncpus = (uint64_t)s * c * t;
if (ncpus > UINT16_MAX || (ns && n != ncpus))
return (-1);
guest_ncpus = ncpus;
sockets = s;
cores = c;
threads = t;
return(0);
}
static int
pincpu_parse(const char *opt)
{
@ -783,6 +868,9 @@ do_open(const char *vmname)
exit(1);
}
}
error = vm_set_topology(ctx, sockets, cores, threads, maxcpus);
if (error)
errx(EX_OSERR, "vm_set_topology");
return (ctx);
}
@ -801,6 +889,8 @@ main(int argc, char *argv[])
progname = basename(argv[0]);
gdb_port = 0;
guest_ncpus = 1;
sockets = cores = threads = 1;
maxcpus = 0;
memsize = 256 * MB;
mptgen = 1;
rtc_localtime = 1;
@ -825,7 +915,10 @@ main(int argc, char *argv[])
}
break;
case 'c':
guest_ncpus = atoi(optarg);
if (topology_parse(optarg) != 0) {
errx(EX_USAGE, "invalid cpu topology "
"'%s'", optarg);
}
break;
case 'C':
memflags |= VM_MEM_F_INCORE;
@ -903,11 +996,6 @@ main(int argc, char *argv[])
vmname = argv[0];
ctx = do_open(vmname);
if (guest_ncpus < 1) {
fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus);
exit(1);
}
max_vcpus = num_vcpus_allowed(ctx);
if (guest_ncpus > max_vcpus) {
fprintf(stderr, "%d vCPUs requested but only %d available\n",

View file

@ -191,7 +191,8 @@ usage(bool cpu_intel)
" [--get-msr-bitmap]\n"
" [--get-msr-bitmap-address]\n"
" [--get-guest-sysenter]\n"
" [--get-exit-reason]\n",
" [--get-exit-reason]\n"
" [--get-cpu-topology]\n",
progname);
if (cpu_intel) {
@ -285,6 +286,7 @@ static int set_x2apic_state, get_x2apic_state;
enum x2apic_state x2apic_state;
static int unassign_pptdev, bus, slot, func;
static int run;
static int get_cpu_topology;
/*
* VMCB specific.
@ -1456,6 +1458,7 @@ setup_options(bool cpu_intel)
{ "get-active-cpus", NO_ARG, &get_active_cpus, 1 },
{ "get-suspended-cpus", NO_ARG, &get_suspended_cpus, 1 },
{ "get-intinfo", NO_ARG, &get_intinfo, 1 },
{ "get-cpu-topology", NO_ARG, &get_cpu_topology, 1 },
};
const struct option intel_opts[] = {
@ -2312,6 +2315,14 @@ main(int argc, char *argv[])
}
}
if (!error && (get_cpu_topology || get_all)) {
uint16_t sockets, cores, threads, maxcpus;
vm_get_topology(ctx, &sockets, &cores, &threads, &maxcpus);
printf("cpu_topology:\tsockets=%hu, cores=%hu, threads=%hu, "
"maxcpus=%hu\n", sockets, cores, threads, maxcpus);
}
if (!error && run) {
error = vm_run(ctx, vcpu, &vmexit);
if (error == 0)