Merge pull request #29508 from CodethinkLabs/systemd-vmspawn-pr

systemd-vmspawn implementation that only supports disk images
This commit is contained in:
Luca Boccassi 2023-11-03 16:04:38 +00:00 committed by GitHub
commit 1af46aecf5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
18 changed files with 1041 additions and 129 deletions

View file

@ -1118,6 +1118,7 @@ manpages = [
'8',
['systemd-veritysetup'],
'HAVE_LIBCRYPTSETUP'],
['systemd-vmspawn', '1', [], 'ENABLE_VMSPAWN'],
['systemd-volatile-root.service', '8', ['systemd-volatile-root'], ''],
['systemd-xdg-autostart-generator', '8', [], 'ENABLE_XDG_AUTOSTART'],
['systemd', '1', ['init'], ''],

161
man/systemd-vmspawn.xml Normal file
View file

@ -0,0 +1,161 @@
<?xml version='1.0'?>
<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
"http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
<!-- SPDX-License-Identifier: LGPL-2.1-or-later -->
<refentry id="systemd-vmspawn" conditional="ENABLE_VMSPAWN"
xmlns:xi="http://www.w3.org/2001/XInclude">
<refentryinfo>
<title>systemd-vmspawn</title>
<productname>systemd</productname>
</refentryinfo>
<refmeta>
<refentrytitle>systemd-vmspawn</refentrytitle>
<manvolnum>1</manvolnum>
</refmeta>
<refnamediv>
<refname>systemd-vmspawn</refname>
<refpurpose>Spawn an OS in a virtual machine.</refpurpose>
</refnamediv>
<refsynopsisdiv>
<cmdsynopsis>
<command>systemd-vmspawn</command>
<arg choice="opt" rep="repeat">OPTIONS</arg>
<arg choice="opt" rep="repeat">ARGS</arg>
</cmdsynopsis>
</refsynopsisdiv>
<refsect1>
<title>Description</title>
<para><command>systemd-vmspawn</command> may be used to start a virtual machine from an OS image. In many ways it is similar to <citerefentry
project='man-pages'><refentrytitle>systemd-nspawn</refentrytitle><manvolnum>1</manvolnum></citerefentry>, but it
launches a full virtual machine instead of using namespaces.</para>
</refsect1>
<refsect1>
<title>Options</title>
<para>The arguments are passed straight through to QEMU, extending its command line arguments.</para>
<para>The following options are understood:</para>
<refsect2>
<title>Image Options</title>
<variablelist>
<varlistentry>
<term><option>-i</option></term>
<term><option>--image=</option></term>
<listitem><para>Root file system disk image (or device node) for the virtual machine.</para></listitem>
</varlistentry>
</variablelist>
</refsect2>
<refsect2>
<title>Host Configuration</title>
<variablelist>
<varlistentry>
<term><option>--qemu-smp=</option><replaceable>SMP</replaceable></term>
<listitem><para>Configures the number of CPUs to start the virtual machine with.</para>
<para>Defaults to 1.</para></listitem>
</varlistentry>
<varlistentry>
<term><option>--qemu-mem=</option><replaceable>MEM</replaceable></term>
<listitem><para>Configures the amount of memory to start the virtual machine with.</para>
<para>Defaults to 2G.</para></listitem>
</varlistentry>
<varlistentry>
<term><option>--qemu-kvm=</option></term>
<para>Configure whether to use KVM.</para>
<para>If the option is not specified KVM support will be detected automatically.
If yes is specified KVM is always used, and vice versa if no is set KVM is never used.</para>
</varlistentry>
<varlistentry>
<term><option>--qemu-gui</option></term>
<listitem><para>Start QEMU in graphical mode.</para></listitem>
</varlistentry>
<varlistentry>
<term><option>--secboot=</option></term>
<listitem><para>Configure whether to search for firmware which supports secure boot.</para></listitem>
<para>If the option is not specified the first firmware which is detected will be used.
If the option is set to yes then the first firmware with secure boot support will be selected.
If no is specified then the first firmware without secure boot will be selected.</para>
</varlistentry>
</variablelist>
</refsect2>
<refsect2>
<title>Credentials</title>
<variablelist>
<varlistentry>
<term><option>--load-credential=</option><replaceable>ID</replaceable>:<replaceable>PATH</replaceable></term>
<term><option>--set-credential=</option><replaceable>ID</replaceable>:<replaceable>VALUE</replaceable></term>
<listitem><para>Pass a credential to the container. These two options correspond to the
<varname>LoadCredential=</varname> and <varname>SetCredential=</varname> settings in unit files. See
<citerefentry><refentrytitle>systemd.exec</refentrytitle><manvolnum>5</manvolnum></citerefentry> for
details about these concepts, as well as the syntax of the option's arguments.</para>
<para>In order to embed binary data into the credential data for <option>--set-credential=</option>,
use C-style escaping (i.e. <literal>\n</literal> to embed a newline, or <literal>\x00</literal> to
embed a <constant>NUL</constant> byte). Note that the invoking shell might already apply unescaping
once, hence this might require double escaping!.</para>
</listitem>
</varlistentry>
</variablelist>
</refsect2><refsect2>
<title>Other</title>
<variablelist>
<xi:include href="standard-options.xml" xpointer="no-pager" />
<xi:include href="standard-options.xml" xpointer="help" />
<xi:include href="standard-options.xml" xpointer="version" />
</variablelist>
</refsect2>
</refsect1>
<xi:include href="common-variables.xml" />
<refsect1>
<title>Examples</title>
<example>
<title>Run an Arch Linux VM image generated by mkosi</title>
<programlisting># mkosi -d arch -p systemd -p linux --autologin -o image.raw -f build
# systemd-vmspawn --image=image.raw</programlisting>
</example>
</refsect1>
<refsect1>
<title>Exit status</title>
<para>If an error occured the value errno is propagated to the return code.
Otherwise EXIT_SUCCESS is returned.</para>
</refsect1>
<refsect1>
<title>See Also</title>
<para>
<citerefentry><refentrytitle>systemd</refentrytitle><manvolnum>1</manvolnum></citerefentry>,
<citerefentry><refentrytitle>mkosi</refentrytitle><manvolnum>1</manvolnum></citerefentry>,
</para>
</refsect1>
</refentry>

View file

@ -1609,6 +1609,7 @@ foreach term : ['analyze',
'userdb',
'utmp',
'vconsole',
'vmspawn',
'xdg-autostart']
have = get_option(term)
name = 'ENABLE_' + term.underscorify().to_upper()
@ -2213,6 +2214,7 @@ subdir('src/userdb')
subdir('src/varlinkctl')
subdir('src/vconsole')
subdir('src/veritysetup')
subdir('src/vmspawn')
subdir('src/volatile-root')
subdir('src/xdg-autostart-generator')
@ -2804,6 +2806,7 @@ foreach tuple : [
['tmpfiles'],
['userdb'],
['vconsole'],
['vmspawn'],
['xdg-autostart'],
# optional features

View file

@ -156,6 +156,8 @@ option('backlight', type : 'boolean',
description : 'support for restoring backlight state')
option('vconsole', type : 'boolean',
description : 'support for vconsole configuration')
option('vmspawn', type : 'boolean', value: false,
description : 'install the systemd-vmspawn tool')
option('quotacheck', type : 'boolean',
description : 'support for the quotacheck tools')
option('sysusers', type : 'boolean',

View file

@ -74,6 +74,10 @@ static inline const sd_char *yes_no(bool b) {
return b ? STR_C("yes") : STR_C("no");
}
static inline const sd_char *on_off(bool b) {
return b ? STR_C("on") : STR_C("off");
}
static inline const sd_char* comparison_operator(int result) {
return result < 0 ? STR_C("<") : result > 0 ? STR_C(">") : STR_C("==");
}

View file

@ -3,7 +3,6 @@
libnspawn_core_sources = files(
'nspawn-bind-user.c',
'nspawn-cgroup.c',
'nspawn-creds.c',
'nspawn-expose-ports.c',
'nspawn-mount.c',
'nspawn-network.c',

View file

@ -1,25 +0,0 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#include "alloc-util.h"
#include "macro.h"
#include "memory-util.h"
#include "nspawn-creds.h"
static void credential_free(Credential *cred) {
assert(cred);
cred->id = mfree(cred->id);
cred->data = erase_and_free(cred->data);
cred->size = 0;
}
void credential_free_all(Credential *creds, size_t n) {
size_t i;
assert(creds || n == 0);
for (i = 0; i < n; i++)
credential_free(creds + i);
free(creds);
}

View file

@ -1,12 +0,0 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#pragma once
#include <sys/types.h>
typedef struct Credential {
char *id;
void *data;
size_t size;
} Credential;
void credential_free_all(Credential *creds, size_t n);

View file

@ -60,6 +60,7 @@
#include "log.h"
#include "loop-util.h"
#include "loopback-setup.h"
#include "machine-credential.h"
#include "macro.h"
#include "main-func.h"
#include "missing_sched.h"
@ -70,7 +71,6 @@
#include "netlink-util.h"
#include "nspawn-bind-user.h"
#include "nspawn-cgroup.h"
#include "nspawn-creds.h"
#include "nspawn-def.h"
#include "nspawn-expose-ports.h"
#include "nspawn-mount.h"
@ -229,7 +229,7 @@ static DeviceNode* arg_extra_nodes = NULL;
static size_t arg_n_extra_nodes = 0;
static char **arg_sysctl = NULL;
static ConsoleMode arg_console_mode = _CONSOLE_MODE_INVALID;
static Credential *arg_credentials = NULL;
static MachineCredential *arg_credentials = NULL;
static size_t arg_n_credentials = 0;
static char **arg_bind_user = NULL;
static bool arg_suppress_sync = false;
@ -1567,106 +1567,24 @@ static int parse_argv(int argc, char *argv[]) {
arg_pager_flags |= PAGER_DISABLE;
break;
case ARG_SET_CREDENTIAL: {
_cleanup_free_ char *word = NULL, *data = NULL;
const char *p = optarg;
Credential *a;
ssize_t l;
r = extract_first_word(&p, &word, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
case ARG_SET_CREDENTIAL:
r = machine_credential_set(&arg_credentials, &arg_n_credentials, optarg);
if (r == -ENOMEM)
return log_oom();
if (r < 0)
return log_error_errno(r, "Failed to parse --set-credential= parameter: %m");
if (r == 0 || !p)
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Missing value for --set-credential=: %s", optarg);
if (!credential_name_valid(word))
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Credential name is not valid: %s", word);
for (size_t i = 0; i < arg_n_credentials; i++)
if (streq(arg_credentials[i].id, word))
return log_error_errno(SYNTHETIC_ERRNO(EEXIST), "Duplicate credential '%s', refusing.", word);
l = cunescape(p, UNESCAPE_ACCEPT_NUL, &data);
if (l < 0)
return log_error_errno(l, "Failed to unescape credential data: %s", p);
a = reallocarray(arg_credentials, arg_n_credentials + 1, sizeof(Credential));
if (!a)
return log_oom();
a[arg_n_credentials++] = (Credential) {
.id = TAKE_PTR(word),
.data = TAKE_PTR(data),
.size = l,
};
arg_credentials = a;
return log_error_errno(r, "Failed to set credential from %s: %m", optarg);
arg_settings_mask |= SETTING_CREDENTIALS;
break;
}
case ARG_LOAD_CREDENTIAL: {
ReadFullFileFlags flags = READ_FULL_FILE_SECURE;
_cleanup_(erase_and_freep) char *data = NULL;
_cleanup_free_ char *word = NULL, *j = NULL;
const char *p = optarg;
Credential *a;
size_t size, i;
r = extract_first_word(&p, &word, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
case ARG_LOAD_CREDENTIAL:
r = machine_credential_load(&arg_credentials, &arg_n_credentials, optarg);
if (r == -ENOMEM)
return log_oom();
if (r < 0)
return log_error_errno(r, "Failed to parse --load-credential= parameter: %m");
if (r == 0 || !p)
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Missing value for --load-credential=: %s", optarg);
if (!credential_name_valid(word))
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Credential name is not valid: %s", word);
for (i = 0; i < arg_n_credentials; i++)
if (streq(arg_credentials[i].id, word))
return log_error_errno(SYNTHETIC_ERRNO(EEXIST), "Duplicate credential '%s', refusing.", word);
if (path_is_absolute(p))
flags |= READ_FULL_FILE_CONNECT_SOCKET;
else {
const char *e;
r = get_credentials_dir(&e);
if (r < 0)
return log_error_errno(r, "Credential not available (no credentials passed at all): %s", word);
j = path_join(e, p);
if (!j)
return log_oom();
}
r = read_full_file_full(AT_FDCWD, j ?: p, UINT64_MAX, SIZE_MAX,
flags,
NULL,
&data, &size);
if (r < 0)
return log_error_errno(r, "Failed to read credential '%s': %m", j ?: p);
a = reallocarray(arg_credentials, arg_n_credentials + 1, sizeof(Credential));
if (!a)
return log_oom();
a[arg_n_credentials++] = (Credential) {
.id = TAKE_PTR(word),
.data = TAKE_PTR(data),
.size = size,
};
arg_credentials = a;
return log_error_errno(r, "Failed to load credential from %s: %m", optarg);
arg_settings_mask |= SETTING_CREDENTIALS;
break;
}
case ARG_BIND_USER:
if (!valid_user_group_name(optarg, 0))
@ -5933,7 +5851,7 @@ finish:
expose_port_free_all(arg_expose_ports);
rlimit_free_all(arg_rlimit);
device_node_array_free(arg_extra_nodes, arg_n_extra_nodes);
credential_free_all(arg_credentials, arg_n_credentials);
machine_credential_free_all(arg_credentials, arg_n_credentials);
if (r < 0)
return r;

View file

@ -0,0 +1,137 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#include "alloc-util.h"
#include "creds-util.h"
#include "escape.h"
#include "extract-word.h"
#include "fileio.h"
#include "macro.h"
#include "memory-util.h"
#include "machine-credential.h"
#include "path-util.h"
#include "string-util-fundamental.h"
static void machine_credential_done(MachineCredential *cred) {
assert(cred);
cred->id = mfree(cred->id);
cred->data = erase_and_free(cred->data);
cred->size = 0;
}
void machine_credential_free_all(MachineCredential *creds, size_t n) {
assert(creds || n == 0);
FOREACH_ARRAY(cred, creds, n)
machine_credential_done(cred);
free(creds);
}
int machine_credential_set(MachineCredential **credentials, size_t *n_credentials, const char *cred_string) {
_cleanup_free_ char *word = NULL, *data = NULL;
MachineCredential *creds = *ASSERT_PTR(credentials);
ssize_t l;
size_t n_creds = *ASSERT_PTR(n_credentials);
int r;
const char *p = ASSERT_PTR(cred_string);
assert(creds || n_creds == 0);
r = extract_first_word(&p, &word, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
if (r == -ENOMEM)
return r;
if (r < 0)
return log_error_errno(r, "Failed to parse --set-credential= parameter: %m");
if (r == 0 || !p)
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Missing value for --set-credential=: %s", cred_string);
if (!credential_name_valid(word))
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "MachineCredential name is not valid: %s", word);
FOREACH_ARRAY(cred, creds, n_creds)
if (streq(cred->id, word))
return log_error_errno(SYNTHETIC_ERRNO(EEXIST), "Duplicate credential '%s', refusing.", word);
l = cunescape(p, UNESCAPE_ACCEPT_NUL, &data);
if (l < 0)
return log_error_errno(l, "Failed to unescape credential data: %s", p);
GREEDY_REALLOC(creds, n_creds + 1);
if (!creds)
return -ENOMEM;
creds[n_creds++] = (MachineCredential) {
.id = TAKE_PTR(word),
.data = TAKE_PTR(data),
.size = l,
};
*credentials = creds;
*n_credentials = n_creds;
return 0;
}
int machine_credential_load(MachineCredential **credentials, size_t *n_credentials, const char *cred_path) {
ReadFullFileFlags flags = READ_FULL_FILE_SECURE;
_cleanup_(erase_and_freep) char *data = NULL;
_cleanup_free_ char *word = NULL, *j = NULL;
MachineCredential *creds = *ASSERT_PTR(credentials);
size_t size, n_creds = *ASSERT_PTR(n_credentials);
int r;
const char *p = ASSERT_PTR(cred_path);
assert(creds || n_creds == 0);
r = extract_first_word(&p, &word, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
if (r == -ENOMEM)
return -ENOMEM;
if (r < 0)
return log_error_errno(r, "Failed to parse --load-credential= parameter: %m");
if (r == 0 || !p)
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Missing value for --load-credential=: %s", cred_path);
if (!credential_name_valid(word))
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "MachineCredential name is not valid: %s", word);
FOREACH_ARRAY(cred, creds, n_creds)
if (streq(cred->id, word))
return log_error_errno(SYNTHETIC_ERRNO(EEXIST), "Duplicate credential '%s', refusing.", word);
if (path_is_absolute(p))
flags |= READ_FULL_FILE_CONNECT_SOCKET;
else {
const char *e;
r = get_credentials_dir(&e);
if (r < 0)
return log_error_errno(r, "MachineCredential not available (no credentials passed at all): %s", word);
j = path_join(e, p);
if (!j)
return -ENOMEM;
}
r = read_full_file_full(AT_FDCWD, j ?: p, UINT64_MAX, SIZE_MAX,
flags,
NULL,
&data, &size);
if (r < 0)
return log_error_errno(r, "Failed to read credential '%s': %m", j ?: p);
GREEDY_REALLOC(creds, n_creds + 1);
if (!creds)
return -ENOMEM;
creds[n_creds++] = (MachineCredential) {
.id = TAKE_PTR(word),
.data = TAKE_PTR(data),
.size = size,
};
*credentials = creds;
*n_credentials = n_creds;
return 0;
}

View file

@ -0,0 +1,14 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#pragma once
#include <sys/types.h>
typedef struct MachineCredential {
char *id;
void *data;
size_t size;
} MachineCredential;
void machine_credential_free_all(MachineCredential *creds, size_t n);
int machine_credential_set(MachineCredential **credentials, size_t *n_credentials, const char *cred_string);
int machine_credential_load(MachineCredential **credentials, size_t *n_credentials, const char *cred_path);

View file

@ -111,6 +111,7 @@ shared_sources = files(
'loop-util.c',
'loopback-setup.c',
'lsm-util.c',
'machine-credential.c',
'machine-id-setup.c',
'machine-pool.c',
'macvlan-util.c',

27
src/vmspawn/meson.build Normal file
View file

@ -0,0 +1,27 @@
# SPDX-License-Identifier: LGPL-2.1-or-later
libvmspawn_core_sources = files(
'vmspawn-settings.c',
'vmspawn-util.c',
)
libvmspawn_core = static_library(
'vmspawn-core',
libvmspawn_core_sources,
include_directories : includes,
dependencies : [userspace],
build_by_default : false)
vmspawn_libs = [
libvmspawn_core,
libshared,
]
executables += [
executable_template + {
'name' : 'systemd-vmspawn',
'public' : true,
'conditions': ['ENABLE_VMSPAWN'],
'sources' : files('vmspawn.c'),
'link_with' : vmspawn_libs,
}
]

View file

@ -0,0 +1,3 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#include "vmspawn-settings.h"

View file

@ -0,0 +1,11 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#pragma once
#include <stdint.h>
typedef enum SettingsMask {
SETTING_START_MODE = UINT64_C(1) << 0,
SETTING_DIRECTORY = UINT64_C(1) << 26,
SETTING_CREDENTIALS = UINT64_C(1) << 30,
_SETTING_FORCE_ENUM_WIDTH = UINT64_MAX
} SettingsMask;

238
src/vmspawn/vmspawn-util.c Normal file
View file

@ -0,0 +1,238 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#include <stdio.h>
#include "alloc-util.h"
#include "architecture.h"
#include "conf-files.h"
#include "errno-util.h"
#include "fd-util.h"
#include "fileio.h"
#include "json.h"
#include "log.h"
#include "macro.h"
#include "memory-util.h"
#include "path-lookup.h"
#include "path-util.h"
#include "recurse-dir.h"
#include "sort-util.h"
#include "string-util.h"
#include "strv.h"
#include "vmspawn-util.h"
OvmfConfig* ovmf_config_free(OvmfConfig *config) {
if (!config)
return NULL;
free(config->path);
free(config->vars);
return mfree(config);
}
int qemu_check_kvm_support(void) {
if (access("/dev/kvm", F_OK) >= 0)
return true;
if (errno == ENOENT) {
log_debug_errno(errno, "/dev/kvm not found. Not using KVM acceleration.");
return false;
}
if (errno == EPERM) {
log_debug_errno(errno, "Permission denied to access /dev/kvm. Not using KVM acceleration.");
return false;
}
return -errno;
}
/* holds the data retrieved from the QEMU firmware interop JSON data */
typedef struct FirmwareData {
char **features;
char *firmware;
char *vars;
} FirmwareData;
static FirmwareData* firmware_data_free(FirmwareData *fwd) {
if (!fwd)
return NULL;
fwd->features = strv_free(fwd->features);
fwd->firmware = mfree(fwd->firmware);
fwd->vars = mfree(fwd->vars);
return mfree(fwd);
}
DEFINE_TRIVIAL_CLEANUP_FUNC(FirmwareData*, firmware_data_free);
static int firmware_executable(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
static const JsonDispatch table[] = {
{ "filename", JSON_VARIANT_STRING, json_dispatch_string, offsetof(FirmwareData, firmware), JSON_MANDATORY },
{ "format", JSON_VARIANT_STRING, NULL, 0, JSON_MANDATORY },
{}
};
return json_dispatch(v, table, 0, userdata);
}
static int firmware_nvram_template(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
static const JsonDispatch table[] = {
{ "filename", JSON_VARIANT_STRING, json_dispatch_string, offsetof(FirmwareData, vars), JSON_MANDATORY },
{ "format", JSON_VARIANT_STRING, NULL, 0, JSON_MANDATORY },
{}
};
return json_dispatch(v, table, 0, userdata);
}
static int firmware_mapping(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
static const JsonDispatch table[] = {
{ "device", JSON_VARIANT_STRING, NULL, 0, JSON_MANDATORY },
{ "executable", JSON_VARIANT_OBJECT, firmware_executable, 0, JSON_MANDATORY },
{ "nvram-template", JSON_VARIANT_OBJECT, firmware_nvram_template, 0, JSON_MANDATORY },
{}
};
return json_dispatch(v, table, 0, userdata);
}
int find_ovmf_config(int search_sb, OvmfConfig **ret) {
_cleanup_(ovmf_config_freep) OvmfConfig *config = NULL;
_cleanup_free_ char *user_firmware_dir = NULL;
_cleanup_strv_free_ char **conf_files = NULL;
int r;
/* Search in:
* - $XDG_CONFIG_HOME/qemu/firmware
* - /etc/qemu/firmware
* - /usr/share/qemu/firmware
*
* Prioritising entries in "more specific" directories
*/
r = xdg_user_config_dir(&user_firmware_dir, "/qemu/firmware");
if (r < 0)
return r;
r = conf_files_list_strv(&conf_files, ".json", NULL, CONF_FILES_FILTER_MASKED|CONF_FILES_REGULAR,
STRV_MAKE_CONST(user_firmware_dir, "/etc/qemu/firmware", "/usr/share/qemu/firmware"));
if (r < 0)
return log_debug_errno(r, "Failed to list config files: %m");
STRV_FOREACH(file, conf_files) {
_cleanup_(firmware_data_freep) FirmwareData *fwd = NULL;
_cleanup_(json_variant_unrefp) JsonVariant *config_json = NULL;
_cleanup_free_ char *contents = NULL;
size_t contents_sz = 0;
r = read_full_file(*file, &contents, &contents_sz);
if (r == -ENOMEM)
return r;
if (r < 0) {
log_debug_errno(r, "Failed to read contents of %s - ignoring: %m", *file);
continue;
}
r = json_parse(contents, 0, &config_json, NULL, NULL);
if (r == -ENOMEM)
return r;
if (r < 0) {
log_debug_errno(r, "Failed to parse the JSON in %s - ignoring: %m", *file);
continue;
}
static const JsonDispatch table[] = {
{ "description", JSON_VARIANT_STRING, NULL, 0, JSON_MANDATORY },
{ "interface-types", JSON_VARIANT_ARRAY, NULL, 0, JSON_MANDATORY },
{ "mapping", JSON_VARIANT_OBJECT, firmware_mapping, 0, JSON_MANDATORY },
{ "targets", JSON_VARIANT_ARRAY, NULL, 0, JSON_MANDATORY },
{ "features", JSON_VARIANT_ARRAY, json_dispatch_strv, offsetof(FirmwareData, features), JSON_MANDATORY },
{ "tags", JSON_VARIANT_ARRAY, NULL, 0, JSON_MANDATORY },
{}
};
fwd = new0(FirmwareData, 1);
if (!fwd)
return -ENOMEM;
r = json_dispatch(config_json, table, 0, fwd);
if (r == -ENOMEM)
return r;
if (r < 0) {
log_debug_errno(r, "Failed to extract the required fields from the JSON in %s - ignoring: %m", *file);
continue;
}
int sb_present = !!strv_find(fwd->features, "secure-boot");
/* exclude firmware which doesn't match our Secure Boot requirements */
if (search_sb >= 0 && search_sb != sb_present) {
log_debug("Skipping %s, firmware doesn't fit required Secure Boot configuration", *file);
continue;
}
config = new0(OvmfConfig, 1);
if (!config)
return -ENOMEM;
config->path = TAKE_PTR(fwd->firmware);
config->vars = TAKE_PTR(fwd->vars);
config->supports_sb = sb_present;
break;
}
if (!config)
return -ENOENT;
if (ret)
*ret = TAKE_PTR(config);
return 0;
}
int find_qemu_binary(char **ret_qemu_binary) {
int r;
/*
* On success the path to the qemu binary will be stored in `req_qemu_binary`
*
* If the qemu binary cannot be found -ENOENT will be returned.
* If the native architecture is not supported by qemu -EOPNOTSUPP will be returned;
*/
static const char *architecture_to_qemu_table[_ARCHITECTURE_MAX] = {
[ARCHITECTURE_ARM64] = "aarch64", /* differs from our name */
[ARCHITECTURE_ARM] = "arm",
[ARCHITECTURE_ALPHA] = "alpha",
[ARCHITECTURE_X86_64] = "x86_64", /* differs from our name */
[ARCHITECTURE_X86] = "i386", /* differs from our name */
[ARCHITECTURE_LOONGARCH64] = "loongarch64",
[ARCHITECTURE_MIPS64_LE] = "mips", /* differs from our name */
[ARCHITECTURE_MIPS_LE] = "mips", /* differs from our name */
[ARCHITECTURE_PARISC] = "hppa", /* differs from our name */
[ARCHITECTURE_PPC64_LE] = "ppc", /* differs from our name */
[ARCHITECTURE_PPC64] = "ppc", /* differs from our name */
[ARCHITECTURE_PPC] = "ppc",
[ARCHITECTURE_RISCV32] = "riscv32",
[ARCHITECTURE_RISCV64] = "riscv64",
[ARCHITECTURE_S390X] = "s390x",
};
FOREACH_STRING(s, "qemu", "qemu-kvm") {
r = find_executable(s, ret_qemu_binary);
if (r == 0)
return 0;
if (r != -ENOENT)
return r;
}
const char *arch_qemu = architecture_to_qemu_table[native_architecture()];
if (!arch_qemu)
return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Architecture %s not supported by qemu", architecture_to_string(native_architecture()));
_cleanup_free_ char *qemu_arch_specific = NULL;
qemu_arch_specific = strjoin("qemu-system-", arch_qemu);
if (!qemu_arch_specific)
return -ENOMEM;
return find_executable(qemu_arch_specific, ret_qemu_binary);
}

View file

@ -0,0 +1,24 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#pragma once
#include <stdbool.h>
#include "macro.h"
#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__)
#define ARCHITECTURE_SUPPORTS_SMBIOS 1
#else
#define ARCHITECTURE_SUPPORTS_SMBIOS 0
#endif
typedef struct OvmfConfig {
char *path;
char *vars;
bool supports_sb;
} OvmfConfig;
OvmfConfig* ovmf_config_free(OvmfConfig *ovmf_config);
DEFINE_TRIVIAL_CLEANUP_FUNC(OvmfConfig*, ovmf_config_free);
int qemu_check_kvm_support(void);
int find_ovmf_config(int search_sb, OvmfConfig **ret_ovmf_config);
int find_qemu_binary(char **ret_qemu_binary);

406
src/vmspawn/vmspawn.c Normal file
View file

@ -0,0 +1,406 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#include <getopt.h>
#include <stdint.h>
#include <stdlib.h>
#include <sys/wait.h>
#include <unistd.h>
#include "alloc-util.h"
#include "architecture.h"
#include "build.h"
#include "copy.h"
#include "creds-util.h"
#include "escape.h"
#include "fileio.h"
#include "format-util.h"
#include "hexdecoct.h"
#include "log.h"
#include "machine-credential.h"
#include "main-func.h"
#include "pager.h"
#include "parse-argument.h"
#include "parse-util.h"
#include "path-util.h"
#include "pretty-print.h"
#include "process-util.h"
#include "strv.h"
#include "tmpfile-util.h"
#include "vmspawn-settings.h"
#include "vmspawn-util.h"
static PagerFlags arg_pager_flags = 0;
static char *arg_image = NULL;
static char *arg_qemu_smp = NULL;
static uint64_t arg_qemu_mem = 2ULL * 1024ULL * 1024ULL * 1024ULL;
static int arg_qemu_kvm = -1;
static bool arg_qemu_gui = false;
static int arg_secure_boot = -1;
static MachineCredential *arg_credentials = NULL;
static size_t arg_n_credentials = 0;
static SettingsMask arg_settings_mask = 0;
static char **arg_parameters = NULL;
STATIC_DESTRUCTOR_REGISTER(arg_image, freep);
STATIC_DESTRUCTOR_REGISTER(arg_qemu_smp, freep);
STATIC_DESTRUCTOR_REGISTER(arg_parameters, strv_freep);
static int help(void) {
_cleanup_free_ char *link = NULL;
int r;
pager_open(arg_pager_flags);
r = terminal_urlify_man("systemd-vmspawn", "1", &link);
if (r < 0)
return log_oom();
printf("%1$s [OPTIONS...] [ARGUMENTS...]\n\n"
"%5$sSpawn a command or OS in a virtual machine.%6$s\n\n"
" -h --help Show this help\n"
" --version Print version string\n"
" --no-pager Do not pipe output into a pager\n\n"
"%3$sImage:%4$s\n"
" -i --image=PATH Root file system disk image (or device node) for\n"
" the virtual machine\n\n"
"%3$sHost Configuration:%4$s\n"
" --qemu-smp=SMP Configure guest's SMP settings\n"
" --qemu-mem=MEM Configure guest's RAM size\n"
" --qemu-kvm= Configure whether to use KVM or not\n"
" --qemu-gui Start QEMU in graphical mode\n"
" --secure-boot= Configure whether to search for firmware which supports Secure Boot\n\n"
"%3$sCredentials:%4$s\n"
" --set-credential=ID:VALUE\n"
" Pass a credential with literal value to container.\n"
" --load-credential=ID:PATH\n"
" Load credential to pass to container from file or\n"
" AF_UNIX stream socket.\n"
"\nSee the %2$s for details.\n",
program_invocation_short_name,
link,
ansi_underline(),
ansi_normal(),
ansi_highlight(),
ansi_normal());
return 0;
}
static int parse_argv(int argc, char *argv[]) {
enum {
ARG_VERSION = 0x100,
ARG_NO_PAGER,
ARG_QEMU_SMP,
ARG_QEMU_MEM,
ARG_QEMU_KVM,
ARG_QEMU_GUI,
ARG_SECURE_BOOT,
ARG_SET_CREDENTIAL,
ARG_LOAD_CREDENTIAL,
};
static const struct option options[] = {
{ "help", no_argument, NULL, 'h' },
{ "version", no_argument, NULL, ARG_VERSION },
{ "no-pager", no_argument, NULL, ARG_NO_PAGER },
{ "image", required_argument, NULL, 'i' },
{ "qemu-smp", required_argument, NULL, ARG_QEMU_SMP },
{ "qemu-mem", required_argument, NULL, ARG_QEMU_MEM },
{ "qemu-kvm", required_argument, NULL, ARG_QEMU_KVM },
{ "qemu-gui", no_argument, NULL, ARG_QEMU_GUI },
{ "secure-boot", required_argument, NULL, ARG_SECURE_BOOT },
{ "set-credential", required_argument, NULL, ARG_SET_CREDENTIAL },
{ "load-credential", required_argument, NULL, ARG_LOAD_CREDENTIAL },
{}
};
int c, r;
assert(argc >= 0);
assert(argv);
optind = 0;
while ((c = getopt_long(argc, argv, "+hi:", options, NULL)) >= 0)
switch (c) {
case 'h':
return help();
case ARG_VERSION:
return version();
case 'i':
r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_image);
if (r < 0)
return r;
arg_settings_mask |= SETTING_DIRECTORY;
break;
case ARG_NO_PAGER:
arg_pager_flags |= PAGER_DISABLE;
break;
case ARG_QEMU_SMP:
arg_qemu_smp = strdup(optarg);
if (!arg_qemu_smp)
return log_oom();
break;
case ARG_QEMU_MEM:
r = parse_size(optarg, 1024, &arg_qemu_mem);
if (r < 0)
return log_error_errno(r, "Failed to parse --qemu-mem=%s: %m", optarg);
break;
case ARG_QEMU_KVM:
r = parse_tristate(optarg, &arg_qemu_kvm);
if (r < 0)
return log_error_errno(r, "Failed to parse --qemu-kvm=%s: %m", optarg);
break;
case ARG_QEMU_GUI:
arg_qemu_gui = true;
break;
case ARG_SECURE_BOOT:
r = parse_tristate(optarg, &arg_secure_boot);
if (r < 0)
return log_error_errno(r, "Failed to parse --secure-boot=%s: %m", optarg);
break;
case ARG_SET_CREDENTIAL: {
r = machine_credential_set(&arg_credentials, &arg_n_credentials, optarg);
if (r == -ENOMEM)
return log_oom();
if (r < 0)
return log_error_errno(r, "Failed to set credential from %s: %m", optarg);
arg_settings_mask |= SETTING_CREDENTIALS;
break;
}
case ARG_LOAD_CREDENTIAL: {
r = machine_credential_load(&arg_credentials, &arg_n_credentials, optarg);
if (r == -ENOMEM)
return log_oom();
if (r < 0)
return log_error_errno(r, "Failed to load credential from %s: %m", optarg);
arg_settings_mask |= SETTING_CREDENTIALS;
break;
}
case '?':
return -EINVAL;
default:
assert_not_reached();
}
if (argc > optind) {
strv_free(arg_parameters);
arg_parameters = strv_copy(argv + optind);
if (!arg_parameters)
return log_oom();
arg_settings_mask |= SETTING_START_MODE;
}
return 1;
}
static int run_virtual_machine(void) {
_cleanup_(ovmf_config_freep) OvmfConfig *ovmf_config = NULL;
_cleanup_strv_free_ char **cmdline = NULL;
_cleanup_free_ char *machine = NULL, *qemu_binary = NULL, *mem = NULL;
int r;
bool use_kvm = arg_qemu_kvm > 0;
if (arg_qemu_kvm < 0) {
r = qemu_check_kvm_support();
if (r < 0)
return log_error_errno(r, "Failed to check for KVM support: %m");
use_kvm = r;
}
r = find_ovmf_config(arg_secure_boot, &ovmf_config);
if (r < 0)
return log_error_errno(r, "Failed to find OVMF config: %m");
/* only warn if the user hasn't disabled secureboot */
if (!ovmf_config->supports_sb && arg_secure_boot)
log_warning("Couldn't find OVMF firmware blob with Secure Boot support, "
"falling back to OVMF firmware blobs without Secure Boot support.");
const char *accel = use_kvm ? "kvm" : "tcg";
#ifdef __aarch64__
machine = strjoin("type=virt,accel=", accel);
#else
machine = strjoin("type=q35,accel=", accel, ",smm=", on_off(ovmf_config->supports_sb));
#endif
if (!machine)
return log_oom();
r = find_qemu_binary(&qemu_binary);
if (r == -EOPNOTSUPP)
return log_error_errno(r, "Native architecture is not supported by qemu.");
if (r < 0)
return log_error_errno(r, "Failed to find QEMU binary: %m");
if (asprintf(&mem, "%.4fM", (double)arg_qemu_mem / (1024.0 * 1024.0)) < 0)
return log_oom();
cmdline = strv_new(
qemu_binary,
"-machine", machine,
"-smp", arg_qemu_smp ?: "1",
"-m", mem,
"-object", "rng-random,filename=/dev/urandom,id=rng0",
"-device", "virtio-rng-pci,rng=rng0,id=rng-device0",
"-nic", "user,model=virtio-net-pci",
"-cpu", "max"
);
if (arg_qemu_gui) {
r = strv_extend_strv(&cmdline, STRV_MAKE("-vga", "virtio"), /* filter_duplicates= */ false);
if (r < 0)
return log_oom();
} else {
r = strv_extend_strv(&cmdline, STRV_MAKE(
"-nographic",
"-nodefaults",
"-chardev", "stdio,mux=on,id=console,signal=off",
"-serial", "chardev:console",
"-mon", "console"
), false);
if (r < 0)
return log_oom();
}
#if ARCHITECTURE_SUPPORTS_SMBIOS
ssize_t n;
FOREACH_ARRAY(cred, arg_credentials, arg_n_credentials) {
_cleanup_free_ char *cred_data_b64 = NULL;
n = base64mem(cred->data, cred->size, &cred_data_b64);
if (n < 0)
return log_oom();
r = strv_extend(&cmdline, "-smbios");
if (r < 0)
return log_oom();
r = strv_extendf(&cmdline, "type=11,value=io.systemd.credential.binary:%s=%s", cred->id, cred_data_b64);
if (r < 0)
return log_oom();
}
#endif
r = strv_extend(&cmdline, "-drive");
if (r < 0)
return log_oom();
r = strv_extendf(&cmdline, "if=pflash,format=raw,readonly=on,file=%s", ovmf_config->path);
if (r < 0)
return log_oom();
if (ovmf_config->supports_sb) {
const char *ovmf_vars_from = ovmf_config->vars;
_cleanup_free_ char *ovmf_vars_to = NULL;
_cleanup_close_ int source_fd = -EBADF, target_fd = -EBADF;
r = tempfn_random_child(NULL, "vmspawn-", &ovmf_vars_to);
if (r < 0)
return r;
source_fd = open(ovmf_vars_from, O_RDONLY|O_CLOEXEC);
if (source_fd < 0)
return log_error_errno(source_fd, "Failed to open OVMF vars file %s: %m", ovmf_vars_from);
target_fd = open(ovmf_vars_to, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, 0600);
if (target_fd < 0)
return log_error_errno(errno, "Failed to create regular file for OVMF vars at %s: %m", ovmf_vars_to);
r = copy_bytes(source_fd, target_fd, UINT64_MAX, COPY_REFLINK);
if (r < 0)
return log_error_errno(r, "Failed to copy bytes from %s to %s: %m", ovmf_vars_from, ovmf_vars_to);
/* These aren't always available so don't raise an error if they fail */
(void) copy_xattr(source_fd, NULL, target_fd, NULL, 0);
(void) copy_access(source_fd, target_fd);
(void) copy_times(source_fd, target_fd, 0);
r = strv_extend_strv(&cmdline, STRV_MAKE(
"-global", "ICH9-LPC.disable_s3=1",
"-global", "driver=cfi.pflash01,property=secure,value=on",
"-drive"
), false);
if (r < 0)
return log_oom();
r = strv_extendf(&cmdline, "file=%s,if=pflash,format=raw", ovmf_vars_to);
if (r < 0)
return log_oom();
}
r = strv_extend(&cmdline, "-drive");
if (r < 0)
return log_oom();
r = strv_extendf(&cmdline, "if=none,id=mkosi,file=%s,format=raw", arg_image);
if (r < 0)
return log_oom();
r = strv_extend_strv(&cmdline, STRV_MAKE(
"-device", "virtio-scsi-pci,id=scsi",
"-device", "scsi-hd,drive=mkosi,bootindex=1"
), false);
if (r < 0)
return log_oom();
r = strv_extend_strv(&cmdline, arg_parameters, false);
if (r < 0)
return log_oom();
pid_t child_pid;
r = safe_fork(qemu_binary, 0, &child_pid);
if (r == 0) {
/* set TERM and LANG if they are missing */
if (setenv("TERM", "vt220", 0) < 0)
return log_oom();
if (setenv("LANG", "C.UTF-8", 0) < 0)
return log_oom();
execve(qemu_binary, cmdline, environ);
log_error_errno(errno, "Failed to execve %s: %m", qemu_binary);
_exit(EXIT_FAILURE);
}
return wait_for_terminate_and_check(qemu_binary, child_pid, WAIT_LOG);
}
static int run(int argc, char *argv[]) {
int r, ret = EXIT_SUCCESS;
log_setup();
r = parse_argv(argc, argv);
if (r <= 0)
goto finish;
if (!arg_image) {
log_error("Missing required argument -i/--image, quitting");
goto finish;
}
r = run_virtual_machine();
finish:
machine_credential_free_all(arg_credentials, arg_n_credentials);
if (r < 0)
return r;
return ret;
}
DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);