Merge pull request #31076 from CodethinkLabs/vmspawn/directory_image_support

[vmspawn] directory type image support
This commit is contained in:
Lennart Poettering 2024-02-12 12:20:16 +01:00 committed by GitHub
commit a66ab6d801
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 507 additions and 71 deletions

View file

@ -69,6 +69,20 @@
<refsect2>
<title>Image Options</title>
<varlistentry>
<term><option>-D</option></term>
<term><option>--directory=</option></term>
<listitem><para>Directory to use as file system root for the virtual machine.</para>
<para>One of either <option>--directory=</option> or <option>--image=</option> must be specified.</para>
<para>Note: If mounting a non-root owned directory you may require <option>--private-users=</option>
to map into the user's subuid namespace.</para>
<xi:include href="version-info.xml" xpointer="v256"/>
</listitem>
</varlistentry>
<variablelist>
<varlistentry>
<term><option>-i</option></term>
@ -248,8 +262,56 @@
</listitem>
</varlistentry>
</variablelist>
</refsect2>
</refsect2><refsect2>
<refsect2>
<title>User Namespacing Options</title>
<variablelist>
<varlistentry>
<term><option>--private-users=</option><replaceable>UID_SHIFT[:UID_RANGE]</replaceable></term>
<listitem><para>Controls user namespacing under <option>--directory=</option>.
If enabled, <citerefentry project='man-pages'><refentrytitle>virtiofsd</refentrytitle><manvolnum>1</manvolnum></citerefentry>
is instructed to map user and group ids (UIDs and GIDs). This involves mapping the private UIDs/GIDs used in the virtual machine
(starting with the virtual machine's root user 0 and up) to a range of UIDs/GIDs on the host that are not used for other
purposes (usually in the range beyond the host's UID/GID 65536).</para>
<para>If one or two colon-separated numbers are specified, user namespacing is turned on. <replaceable>UID_SHIFT</replaceable>
specifies the first host UID/GID to map, <replaceable>UID_RANGE</replaceable> is optional and specifies number of host
UIDs/GIDs to assign to the virtual machine. If <replaceable>UID_RANGE</replaceable> is omitted, 65536 UIDs/GIDs are assigned.</para>
<para>When user namespaces are used, the GID range assigned to each virtual machine is always chosen identical to the
UID range.</para>
<xi:include href="version-info.xml" xpointer="v256"/></listitem>
</varlistentry>
</variablelist>
</refsect2>
<refsect2>
<title>Mount Options</title>
<variablelist>
<varlistentry>
<term><option>--bind=</option><replaceable>PATH</replaceable></term>
<term><option>--bind-ro=</option><replaceable>PATH</replaceable></term>
<listitem><para>Mount a directory from the host into the virtual machine. Takes one of: a path
argument — in which case the specified path will be mounted from the host to the same path in the virtual machine, or
a colon-separated pair of paths — in which case the first specified path is the source in the host, and the
second path is the destination in the virtual machine. If the source path is not absolute, it is resolved
relative to the current working directory. The <option>--bind-ro=</option> option creates read-only bind mounts.
Backslash escapes are interpreted, so <literal>\:</literal> may be used to embed colons in either path.
This option may be specified multiple times for creating multiple independent bind mount points.</para>
<xi:include href="version-info.xml" xpointer="v256"/></listitem>
</varlistentry>
</variablelist>
</refsect2>
<refsect2>
<title>Credentials</title>
<variablelist>

View file

@ -11,6 +11,7 @@
#include "missing_magic.h"
#include "missing_sched.h"
#include "namespace-util.h"
#include "parse-util.h"
#include "process-util.h"
#include "stat-util.h"
#include "stdio-util.h"
@ -305,3 +306,41 @@ int in_same_namespace(pid_t pid1, pid_t pid2, NamespaceType type) {
return stat_inode_same(&ns_st1, &ns_st2);
}
int parse_userns_uid_range(const char *s, uid_t *ret_uid_shift, uid_t *ret_uid_range) {
_cleanup_free_ char *buffer = NULL;
const char *range, *shift;
int r;
uid_t uid_shift, uid_range = 65536;
assert(s);
range = strchr(s, ':');
if (range) {
buffer = strndup(s, range - s);
if (!buffer)
return log_oom();
shift = buffer;
range++;
r = safe_atou32(range, &uid_range);
if (r < 0)
return log_error_errno(r, "Failed to parse UID range \"%s\": %m", range);
} else
shift = s;
r = parse_uid(shift, &uid_shift);
if (r < 0)
return log_error_errno(r, "Failed to parse UID \"%s\": %m", s);
if (!userns_shift_range_valid(uid_shift, uid_range))
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "UID range cannot be empty or go beyond " UID_FMT ".", UID_INVALID);
if (ret_uid_shift)
*ret_uid_shift = uid_shift;
if (ret_uid_range)
*ret_uid_range = uid_range;
return 0;
}

View file

@ -53,3 +53,5 @@ static inline bool userns_shift_range_valid(uid_t shift, uid_t range) {
int userns_acquire(const char *uid_map, const char *gid_map);
int netns_acquire(void);
int in_same_namespace(pid_t pid1, pid_t pid2, NamespaceType type);
int parse_userns_uid_range(const char *s, uid_t *ret_uid_shift, uid_t *ret_uid_range);

View file

@ -1255,33 +1255,11 @@ static int parse_argv(int argc, char *argv[]) {
arg_uid_shift = 0;
arg_uid_range = UINT32_C(0x10000);
} else {
_cleanup_free_ char *buffer = NULL;
const char *range, *shift;
/* anything else: User namespacing on, UID range is explicitly configured */
range = strchr(optarg, ':');
if (range) {
buffer = strndup(optarg, range - optarg);
if (!buffer)
return log_oom();
shift = buffer;
range++;
r = safe_atou32(range, &arg_uid_range);
if (r < 0)
return log_error_errno(r, "Failed to parse UID range \"%s\": %m", range);
} else
shift = optarg;
r = parse_uid(shift, &arg_uid_shift);
r = parse_userns_uid_range(optarg, &arg_uid_shift, &arg_uid_range);
if (r < 0)
return log_error_errno(r, "Failed to parse UID \"%s\": %m", optarg);
return r;
arg_userns_mode = USER_NAMESPACE_FIXED;
if (!userns_shift_range_valid(arg_uid_shift, arg_uid_range))
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "UID range cannot be empty or go beyond " UID_FMT ".", UID_INVALID);
}
arg_settings_mask |= SETTING_USERNS;

View file

@ -4,6 +4,7 @@ libvmspawn_core_sources = files(
'vmspawn-settings.c',
'vmspawn-util.c',
'vmspawn-scope.c',
'vmspawn-mount.c',
)
libvmspawn_core = static_library(
'vmspawn-core',

View file

@ -0,0 +1,67 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#include "alloc-util.h"
#include "extract-word.h"
#include "macro.h"
#include "parse-argument.h"
#include "path-util.h"
#include "string-util.h"
#include "vmspawn-mount.h"
static void runtime_mount_done(RuntimeMount *mount) {
assert(mount);
mount->source = mfree(mount->source);
mount->target = mfree(mount->target);
}
void runtime_mount_context_done(RuntimeMountContext *ctx) {
assert(ctx);
FOREACH_ARRAY(mount, ctx->mounts, ctx->n_mounts)
runtime_mount_done(mount);
free(ctx->mounts);
}
int runtime_mount_parse(RuntimeMountContext *ctx, const char *s, bool read_only) {
_cleanup_(runtime_mount_done) RuntimeMount mount = { .read_only = read_only };
_cleanup_free_ char *source_rel = NULL;
int r;
assert(ctx);
r = extract_first_word(&s, &source_rel, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
if (r < 0)
return r;
if (r == 0)
return -EINVAL;
if (isempty(source_rel))
return -EINVAL;
r = path_make_absolute_cwd(source_rel, &mount.source);
if (r < 0)
return r;
/* virtiofsd only supports directories */
r = is_dir(mount.source, /* follow= */ true);
if (r < 0)
return r;
if (!r)
return -ENOTDIR;
mount.target = s ? strdup(s) : TAKE_PTR(source_rel);
if (!mount.target)
return -ENOMEM;
if (!path_is_absolute(mount.target))
return -EINVAL;
if (!GREEDY_REALLOC(ctx->mounts, ctx->n_mounts + 1))
return log_oom();
ctx->mounts[ctx->n_mounts++] = TAKE_STRUCT(mount);
return 0;
}

View file

@ -0,0 +1,19 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#pragma once
#include <stdbool.h>
#include <stddef.h>
typedef struct RuntimeMount {
bool read_only;
char *source;
char *target;
} RuntimeMount;
typedef struct RuntimeMountContext {
RuntimeMount *mounts;
size_t n_mounts;
} RuntimeMountContext;
void runtime_mount_context_done(RuntimeMountContext *ctx);
int runtime_mount_parse(RuntimeMountContext *ctx, const char *s, bool read_only);

View file

@ -5,6 +5,7 @@
typedef enum SettingsMask {
SETTING_START_MODE = UINT64_C(1) << 0,
SETTING_BIND_MOUNTS = UINT64_C(1) << 11,
SETTING_DIRECTORY = UINT64_C(1) << 26,
SETTING_CREDENTIALS = UINT64_C(1) << 30,
_SETTING_FORCE_ENUM_WIDTH = UINT64_MAX

View file

@ -16,6 +16,12 @@
#define ARCHITECTURE_SUPPORTS_TPM 0
#endif
#if defined(__x86_64__) || defined(__i386__)
#define ARCHITECTURE_SUPPORTS_SMM 1
#else
#define ARCHITECTURE_SUPPORTS_SMM 0
#endif
#if defined(__arm__) || defined(__aarch64__)
#define DEFAULT_SERIAL_TTY "ttyAMA0"
#elif defined(__s390__) || defined(__s390x__)
@ -26,6 +32,18 @@
#define DEFAULT_SERIAL_TTY "ttyS0"
#endif
#if defined(__x86_64__) || defined(__i386__)
#define QEMU_MACHINE_TYPE "q35"
#elif defined(__arm__) || defined(__aarch64__)
#define QEMU_MACHINE_TYPE "virt"
#elif defined(__s390__) || defined(__s390x__)
#define QEMU_MACHINE_TYPE "s390-ccw-virtio"
#elif defined(__powerpc__) || defined(__powerpc64__)
#define QEMU_MACHINE_TYPE "pseries"
#else
#error "No qemu machine defined for this architecture"
#endif
typedef struct OvmfConfig {
char *path;
char *format;

View file

@ -19,6 +19,7 @@
#include "dissect-image.h"
#include "escape.h"
#include "event-util.h"
#include "extract-word.h"
#include "fileio.h"
#include "format-util.h"
#include "fs-util.h"
@ -38,6 +39,7 @@
#include "path-util.h"
#include "pretty-print.h"
#include "process-util.h"
#include "random-util.h"
#include "rm-rf.h"
#include "signal-util.h"
#include "socket-util.h"
@ -45,12 +47,14 @@
#include "strv.h"
#include "tmpfile-util.h"
#include "unit-name.h"
#include "vmspawn-mount.h"
#include "vmspawn-scope.h"
#include "vmspawn-settings.h"
#include "vmspawn-util.h"
static bool arg_quiet = false;
static PagerFlags arg_pager_flags = 0;
static char *arg_directory = NULL;
static char *arg_image = NULL;
static char *arg_machine = NULL;
static char *arg_qemu_smp = NULL;
@ -65,6 +69,8 @@ static bool arg_qemu_gui = false;
static QemuNetworkStack arg_network_stack = QEMU_NET_NONE;
static int arg_secure_boot = -1;
static MachineCredentialContext arg_credentials = {};
static uid_t arg_uid_shift = UID_INVALID, arg_uid_range = 0x10000U;
static RuntimeMountContext arg_runtime_mounts = {};
static SettingsMask arg_settings_mask = 0;
static char *arg_firmware = NULL;
static char *arg_runtime_directory = NULL;
@ -72,6 +78,7 @@ static bool arg_runtime_directory_created = false;
static bool arg_privileged = false;
static char **arg_kernel_cmdline_extra = NULL;
STATIC_DESTRUCTOR_REGISTER(arg_directory, freep);
STATIC_DESTRUCTOR_REGISTER(arg_image, freep);
STATIC_DESTRUCTOR_REGISTER(arg_machine, freep);
STATIC_DESTRUCTOR_REGISTER(arg_qemu_smp, freep);
@ -80,6 +87,7 @@ STATIC_DESTRUCTOR_REGISTER(arg_credentials, machine_credential_context_done);
STATIC_DESTRUCTOR_REGISTER(arg_firmware, freep);
STATIC_DESTRUCTOR_REGISTER(arg_linux, freep);
STATIC_DESTRUCTOR_REGISTER(arg_initrd, freep);
STATIC_DESTRUCTOR_REGISTER(arg_runtime_mounts, runtime_mount_context_done);
STATIC_DESTRUCTOR_REGISTER(arg_kernel_cmdline_extra, strv_freep);
static int help(void) {
@ -99,6 +107,7 @@ static int help(void) {
" -q --quiet Do not show status information\n"
" --no-pager Do not pipe output into a pager\n"
"\n%3$sImage:%4$s\n"
" -D --directory=PATH Root directory for the container\n"
" -i --image=PATH Root file system disk image (or device node) for\n"
" the virtual machine\n"
"\n%3$sHost Configuration:%4$s\n"
@ -118,6 +127,16 @@ static int help(void) {
" --firmware=PATH|list Select firmware definition file (or list available)\n"
"\n%3$sSystem Identity:%4$s\n"
" -M --machine=NAME Set the machine name for the virtual machine\n"
"\n%3$sUser Namespacing:%4$s\n"
" --private-users=UIDBASE[:NUIDS]\n"
" Configure the UID/GID range to map into the\n"
" virtiofsd namespace\n"
"\n%3$sMounts:%4$s\n"
" --bind=SOURCE[:TARGET]\n"
" Mount a file or directory from the host into\n"
" the VM.\n"
" --bind-ro=SOURCE[:TARGET]\n"
" Similar, but creates a read-only mount\n"
"\n%3$sCredentials:%4$s\n"
" --set-credential=ID:VALUE\n"
" Pass a credential with literal value to the\n"
@ -150,7 +169,10 @@ static int parse_argv(int argc, char *argv[]) {
ARG_INITRD,
ARG_QEMU_GUI,
ARG_NETWORK_USER_MODE,
ARG_BIND,
ARG_BIND_RO,
ARG_SECURE_BOOT,
ARG_PRIVATE_USERS,
ARG_SET_CREDENTIAL,
ARG_LOAD_CREDENTIAL,
ARG_FIRMWARE,
@ -162,6 +184,7 @@ static int parse_argv(int argc, char *argv[]) {
{ "quiet", no_argument, NULL, 'q' },
{ "no-pager", no_argument, NULL, ARG_NO_PAGER },
{ "image", required_argument, NULL, 'i' },
{ "directory", required_argument, NULL, 'D' },
{ "machine", required_argument, NULL, 'M' },
{ "qemu-smp", required_argument, NULL, ARG_QEMU_SMP },
{ "qemu-mem", required_argument, NULL, ARG_QEMU_MEM },
@ -174,7 +197,10 @@ static int parse_argv(int argc, char *argv[]) {
{ "qemu-gui", no_argument, NULL, ARG_QEMU_GUI },
{ "network-tap", no_argument, NULL, 'n' },
{ "network-user-mode", no_argument, NULL, ARG_NETWORK_USER_MODE },
{ "bind", required_argument, NULL, ARG_BIND },
{ "bind-ro", required_argument, NULL, ARG_BIND_RO },
{ "secure-boot", required_argument, NULL, ARG_SECURE_BOOT },
{ "private-users", required_argument, NULL, ARG_PRIVATE_USERS },
{ "set-credential", required_argument, NULL, ARG_SET_CREDENTIAL },
{ "load-credential", required_argument, NULL, ARG_LOAD_CREDENTIAL },
{ "firmware", required_argument, NULL, ARG_FIRMWARE },
@ -187,7 +213,7 @@ static int parse_argv(int argc, char *argv[]) {
assert(argv);
optind = 0;
while ((c = getopt_long(argc, argv, "+hi:M:nq", options, NULL)) >= 0)
while ((c = getopt_long(argc, argv, "+hD:i:M:nq", options, NULL)) >= 0)
switch (c) {
case 'h':
return help();
@ -199,6 +225,14 @@ static int parse_argv(int argc, char *argv[]) {
arg_quiet = true;
break;
case 'D':
r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_directory);
if (r < 0)
return r;
arg_settings_mask |= SETTING_DIRECTORY;
break;
case 'i':
r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_image);
if (r < 0)
@ -296,12 +330,27 @@ static int parse_argv(int argc, char *argv[]) {
arg_network_stack = QEMU_NET_USER;
break;
case ARG_BIND:
case ARG_BIND_RO:
r = runtime_mount_parse(&arg_runtime_mounts, optarg, c == ARG_BIND_RO);
if (r < 0)
return log_error_errno(r, "Failed to parse --bind(-ro)= argument %s: %m", optarg);
arg_settings_mask |= SETTING_BIND_MOUNTS;
break;
case ARG_SECURE_BOOT:
r = parse_tristate(optarg, &arg_secure_boot);
if (r < 0)
return log_error_errno(r, "Failed to parse --secure-boot=%s: %m", optarg);
break;
case ARG_PRIVATE_USERS:
r = parse_userns_uid_range(optarg, &arg_uid_shift, &arg_uid_range);
if (r < 0)
return r;
break;
case ARG_SET_CREDENTIAL: {
r = machine_credential_set(&arg_credentials, optarg);
if (r < 0)
@ -629,6 +678,112 @@ static int discover_root(char **ret) {
return log_oom();
*ret = TAKE_PTR(root);
return 0;
}
static int find_virtiofsd(char **ret) {
int r;
_cleanup_free_ char *virtiofsd = NULL;
assert(ret);
r = find_executable("virtiofsd", &virtiofsd);
if (r < 0 && r != -ENOENT)
return log_error_errno(r, "Error while searching for virtiofsd: %m");
if (!virtiofsd) {
FOREACH_STRING(file, "/usr/libexec/virtiofsd", "/usr/lib/virtiofsd") {
if (access(file, X_OK) >= 0) {
virtiofsd = strdup(file);
if (!virtiofsd)
return log_oom();
break;
}
if (!IN_SET(errno, ENOENT, EACCES))
return log_error_errno(errno, "Error while searching for virtiofsd: %m");
}
}
if (!virtiofsd)
return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Failed to find virtiofsd binary.");
*ret = TAKE_PTR(virtiofsd);
return 0;
}
static int start_virtiofsd(sd_bus *bus, const char *scope, const char *directory, bool uidmap, char **ret_state_tempdir, char **ret_sock_name) {
_cleanup_(rm_rf_physical_and_freep) char *state_dir = NULL;
_cleanup_free_ char *virtiofsd = NULL, *sock_name = NULL, *scope_prefix = NULL;
_cleanup_(socket_service_pair_done) SocketServicePair ssp = {
.socket_type = SOCK_STREAM,
};
static unsigned virtiofsd_instance = 0;
int r;
assert(bus);
assert(scope);
assert(directory);
assert(ret_state_tempdir);
assert(ret_sock_name);
r = find_virtiofsd(&virtiofsd);
if (r < 0)
return r;
r = unit_name_to_prefix(scope, &scope_prefix);
if (r < 0)
return log_error_errno(r, "Failed to strip .scope suffix from scope: %m");
if (asprintf(&ssp.unit_name_prefix, "%s-virtiofsd-%u", scope_prefix, virtiofsd_instance++) < 0)
return log_oom();
state_dir = path_join(arg_runtime_directory, ssp.unit_name_prefix);
if (!state_dir)
return log_oom();
if (arg_runtime_directory_created) {
ssp.runtime_directory = strjoin("systemd/vmspawn/", ssp.unit_name_prefix);
if (!ssp.runtime_directory)
return log_oom();
}
if (asprintf(&sock_name, "sock-%"PRIx64, random_u64()) < 0)
return log_oom();
ssp.listen_address = path_join(state_dir, sock_name);
if (!ssp.listen_address)
return log_oom();
/* QEMU doesn't support submounts so don't announce them */
ssp.exec_start = strv_new(virtiofsd, "--shared-dir", directory, "--xattr", "--fd", "3", "--no-announce-submounts");
if (!ssp.exec_start)
return log_oom();
if (uidmap && arg_uid_shift != UID_INVALID) {
r = strv_extend(&ssp.exec_start, "--uid-map");
if (r < 0)
return log_oom();
r = strv_extendf(&ssp.exec_start, ":0:" UID_FMT ":" UID_FMT ":", arg_uid_shift, arg_uid_range);
if (r < 0)
return log_oom();
r = strv_extend(&ssp.exec_start, "--gid-map");
if (r < 0)
return log_oom();
r = strv_extendf(&ssp.exec_start, ":0:" GID_FMT ":" GID_FMT ":", arg_uid_shift, arg_uid_range);
if (r < 0)
return log_oom();
}
r = start_socket_service_pair(bus, scope, &ssp);
if (r < 0)
return r;
*ret_state_tempdir = TAKE_PTR(state_dir);
*ret_sock_name = TAKE_PTR(sock_name);
return 0;
}
@ -658,12 +813,12 @@ static int kernel_cmdline_maybe_append_root(void) {
static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
_cleanup_(ovmf_config_freep) OvmfConfig *ovmf_config = NULL;
_cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
_cleanup_free_ char *qemu_binary = NULL, *mem = NULL, *trans_scope = NULL;
_cleanup_free_ char *machine = NULL, *qemu_binary = NULL, *mem = NULL, *trans_scope = NULL, *kernel = NULL;
_cleanup_close_ int notify_sock_fd = -EBADF;
_cleanup_strv_free_ char **cmdline = NULL;
_cleanup_free_ int *pass_fds = NULL;
size_t n_pass_fds = 0;
const char *machine, *accel;
const char *accel, *shm;
int r;
if (arg_privileged)
@ -697,18 +852,28 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
log_warning("Couldn't find OVMF firmware blob with Secure Boot support, "
"falling back to OVMF firmware blobs without Secure Boot support.");
shm = arg_directory ? ",memory-backend=mem" : "";
if (ARCHITECTURE_SUPPORTS_SMM)
machine = strjoin("type=" QEMU_MACHINE_TYPE ",smm=", on_off(ovmf_config->supports_sb), shm);
else
machine = strjoin("type=" QEMU_MACHINE_TYPE, shm);
if (!machine)
return log_oom();
if (arg_linux) {
kernel = strdup(arg_linux);
if (!kernel)
return log_oom();
} else if (arg_directory)
return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Please specify a kernel (--linux=) when using -D/--directory=, refusing.");
r = find_qemu_binary(&qemu_binary);
if (r == -EOPNOTSUPP)
return log_error_errno(r, "Native architecture is not supported by qemu.");
if (r < 0)
return log_error_errno(r, "Failed to find QEMU binary: %m");
if (IN_SET(native_architecture(), ARCHITECTURE_ARM64, ARCHITECTURE_ARM64_BE))
machine = "type=virt";
else
machine = ovmf_config->supports_sb ? "type=q35,smm=on" : "type=q35,smm=off";
if (asprintf(&mem, "%" PRIu64, DIV_ROUND_UP(arg_qemu_mem, U64_MB)) < 0)
if (asprintf(&mem, "%" PRIu64 "M", DIV_ROUND_UP(arg_qemu_mem, U64_MB)) < 0)
return log_oom();
cmdline = strv_new(
@ -723,7 +888,7 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
return log_oom();
/* if we are going to be starting any units with state then create our runtime dir */
if (arg_tpm != 0) {
if (arg_tpm != 0 || arg_directory || arg_runtime_mounts.n_mounts != 0) {
r = runtime_directory(&arg_runtime_directory, arg_privileged ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER, "systemd/vmspawn");
if (r < 0)
return log_error_errno(r, "Failed to lookup runtime directory: %m");
@ -745,6 +910,26 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
if (r < 0)
return log_oom();
/* A shared memory backend might increase ram usage so only add one if actually necessary for virtiofsd. */
if (arg_directory || arg_runtime_mounts.n_mounts != 0) {
r = strv_extend(&cmdline, "-object");
if (r < 0)
return log_oom();
r = strv_extendf(&cmdline, "memory-backend-memfd,id=mem,size=%s,share=on", mem);
if (r < 0)
return log_oom();
}
bool use_vsock = arg_qemu_vsock > 0 && ARCHITECTURE_SUPPORTS_SMBIOS;
if (arg_qemu_vsock < 0) {
r = qemu_check_vsock_support();
if (r < 0)
return log_error_errno(r, "Failed to check for VSock support: %m");
use_vsock = r;
}
if (!use_kvm && kvm_device_fd >= 0) {
log_warning("KVM is disabled but fd for /dev/kvm was passed, closing fd and ignoring");
kvm_device_fd = safe_close(kvm_device_fd);
@ -776,15 +961,6 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
if (r < 0)
return log_oom();
bool use_vsock = arg_qemu_vsock > 0 && ARCHITECTURE_SUPPORTS_SMBIOS;
if (arg_qemu_vsock < 0) {
r = qemu_check_vsock_support();
if (r < 0)
return log_error_errno(r, "Failed to check for VSock support: %m");
use_vsock = r;
}
_cleanup_close_ int child_vsock_fd = -EBADF;
if (use_vsock) {
int device_fd = vhost_device_fd;
@ -901,8 +1077,8 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
return log_oom();
}
if (arg_linux) {
r = strv_extend_many(&cmdline, "-kernel", arg_linux);
if (kernel) {
r = strv_extend_many(&cmdline, "-kernel", kernel);
if (r < 0)
return log_oom();
@ -915,31 +1091,93 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
}
}
r = strv_extend(&cmdline, "-drive");
if (r < 0)
return log_oom();
if (arg_image) {
assert(!arg_directory);
r = strv_extendf(&cmdline, "if=none,id=mkosi,file=%s,format=raw", arg_image);
if (r < 0)
return log_oom();
r = strv_extend(&cmdline, "-drive");
if (r < 0)
return log_oom();
r = strv_extend_many(
&cmdline,
r = strv_extendf(&cmdline, "if=none,id=mkosi,file=%s,format=raw", arg_image);
if (r < 0)
return log_oom();
r = strv_extend_many(&cmdline,
"-device", "virtio-scsi-pci,id=scsi",
"-device", "scsi-hd,drive=mkosi,bootindex=1");
if (r < 0)
return log_oom();
if (r < 0)
return log_oom();
}
if (arg_directory) {
_cleanup_free_ char *sock_path = NULL, *sock_name = NULL;
r = start_virtiofsd(bus, trans_scope, arg_directory, /* uidmap= */ true, &sock_path, &sock_name);
if (r < 0)
return r;
r = strv_extend(&cmdline, "-chardev");
if (r < 0)
return log_oom();
r = strv_extendf(&cmdline, "socket,id=%1$s,path=%2$s/%1$s", sock_name, sock_path);
if (r < 0)
return log_oom();
r = strv_extend(&cmdline, "-device");
if (r < 0)
return log_oom();
r = strv_extendf(&cmdline, "vhost-user-fs-pci,queue-size=1024,chardev=%s,tag=root", sock_name);
if (r < 0)
return log_oom();
r = strv_extend(&arg_kernel_cmdline_extra, "root=root rootfstype=virtiofs rw");
if (r < 0)
return log_oom();
}
r = strv_prepend(&arg_kernel_cmdline_extra, "console=" DEFAULT_SERIAL_TTY);
if (r < 0)
return log_oom();
FOREACH_ARRAY(mount, arg_runtime_mounts.mounts, arg_runtime_mounts.n_mounts) {
_cleanup_free_ char *sock_path = NULL, *sock_name = NULL, *clean_target = NULL;
r = start_virtiofsd(bus, trans_scope, mount->source, /* uidmap= */ false, &sock_path, &sock_name);
if (r < 0)
return r;
r = strv_extend(&cmdline, "-chardev");
if (r < 0)
return log_oom();
r = strv_extendf(&cmdline, "socket,id=%1$s,path=%2$s/%1$s", sock_name, sock_path);
if (r < 0)
return log_oom();
r = strv_extend(&cmdline, "-device");
if (r < 0)
return log_oom();
r = strv_extendf(&cmdline, "vhost-user-fs-pci,queue-size=1024,chardev=%1$s,tag=%1$s", sock_name);
if (r < 0)
return log_oom();
clean_target = xescape(mount->target, "\":");
if (!clean_target)
return log_oom();
r = strv_extendf(&arg_kernel_cmdline_extra, "systemd.mount-extra=\"%s:%s:virtiofs:%s\"",
sock_name, clean_target, mount->read_only ? "ro" : "rw");
if (r < 0)
return log_oom();
}
if (ARCHITECTURE_SUPPORTS_SMBIOS) {
_cleanup_free_ char *kcl = strv_join(arg_kernel_cmdline_extra, " ");
if (!kcl)
return log_oom();
if (arg_linux) {
if (kernel) {
r = strv_extend_many(&cmdline, "-append", kcl);
if (r < 0)
return log_oom();
@ -1036,6 +1274,8 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
log_debug("Executing: %s", joined);
}
assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGCHLD, -1) >= 0);
_cleanup_(sd_event_source_unrefp) sd_event_source *notify_event_source = NULL;
_cleanup_(sd_event_unrefp) sd_event *event = NULL;
r = sd_event_new(&event);
@ -1106,20 +1346,30 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
static int determine_names(void) {
int r;
if (!arg_image)
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Missing required argument -i/--image=, quitting");
if (!arg_directory && !arg_image)
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine path, please use -D or -i.");
if (!arg_machine) {
char *e;
if (arg_directory && path_equal(arg_directory, "/")) {
arg_machine = gethostname_malloc();
if (!arg_machine)
return log_oom();
} else if (arg_image) {
char *e;
r = path_extract_filename(arg_image, &arg_machine);
if (r < 0)
return log_error_errno(r, "Failed to extract file name from '%s': %m", arg_image);
r = path_extract_filename(arg_image, &arg_machine);
if (r < 0)
return log_error_errno(r, "Failed to extract file name from '%s': %m", arg_image);
/* Truncate suffix if there is one */
e = endswith(arg_machine, ".raw");
if (e)
*e = 0;
/* Truncate suffix if there is one */
e = endswith(arg_machine, ".raw");
if (e)
*e = 0;
} else {
r = path_extract_filename(arg_directory, &arg_machine);
if (r < 0)
return log_error_errno(r, "Failed to extract file name from '%s': %m", arg_directory);
}
hostname_cleanup(arg_machine);
if (!hostname_is_valid(arg_machine, 0))
@ -1158,11 +1408,12 @@ static int run(int argc, char *argv[]) {
if (!arg_quiet) {
_cleanup_free_ char *u = NULL;
(void) terminal_urlify_path(arg_image, arg_image, &u);
const char *vm_path = arg_image ?: arg_directory;
(void) terminal_urlify_path(vm_path, vm_path, &u);
log_info("%s %sSpawning VM %s on %s.%s\n"
"%s %sPress %sCtrl-a x%s to kill VM.%s",
special_glyph(SPECIAL_GLYPH_LIGHT_SHADE), ansi_grey(), arg_machine, u ?: arg_image, ansi_normal(),
special_glyph(SPECIAL_GLYPH_LIGHT_SHADE), ansi_grey(), arg_machine, u ?: vm_path, ansi_normal(),
special_glyph(SPECIAL_GLYPH_LIGHT_SHADE), ansi_grey(), ansi_highlight(), ansi_grey(), ansi_normal());
}
@ -1182,8 +1433,6 @@ static int run(int argc, char *argv[]) {
}
}
assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGCHLD, -1) >= 0);
return run_virtual_machine(kvm_device_fd, vhost_device_fd);
}