From cea9216a42bddee3621e2e6c3a56a1ec9ce90dd0 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 19 Apr 2024 13:25:55 +0200 Subject: [PATCH 1/7] detect-virt: detect hyperv-enlightened qemu as qemu, not as hyperv CPUID reporting hyperv should be taken with a grain of salt, and we should prefer other mechanisms then. Fixes: #28001 --- src/basic/virt.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/basic/virt.c b/src/basic/virt.c index 7e2c0781fe1..09703506f93 100644 --- a/src/basic/virt.c +++ b/src/basic/virt.c @@ -447,7 +447,7 @@ static Virtualization detect_vm_zvm(void) { /* Returns a short identifier for the various VM implementations */ Virtualization detect_vm(void) { static thread_local Virtualization cached_found = _VIRTUALIZATION_INVALID; - bool other = false; + bool other = false, hyperv = false; int xen_dom0 = 0; Virtualization v, dmi; @@ -504,7 +504,12 @@ Virtualization detect_vm(void) { v = detect_vm_cpuid(); if (v < 0) return v; - if (v == VIRTUALIZATION_VM_OTHER) + if (v == VIRTUALIZATION_MICROSOFT) + /* QEMU sets the CPUID string to hyperv's, in case it provides hyperv enlightenments. Let's + * hence not return Microsoft here but just use the other mechanisms first to make a better + * decision. */ + hyperv = true; + else if (v == VIRTUALIZATION_VM_OTHER) other = true; else if (v != VIRTUALIZATION_NONE) goto finish; @@ -545,8 +550,15 @@ Virtualization detect_vm(void) { return v; finish: - if (v == VIRTUALIZATION_NONE && other) - v = VIRTUALIZATION_VM_OTHER; + /* None of the checks above gave us a clear answer, hence let's now use fallback logic: if hyperv + * enlightenments are available but the VMM wasn't recognized as anything yet, it's probably + * Microsoft. */ + if (v == VIRTUALIZATION_NONE) { + if (hyperv) + v = VIRTUALIZATION_MICROSOFT; + else if (other) + v = VIRTUALIZATION_VM_OTHER; + } cached_found = v; log_debug("Found VM virtualization %s", virtualization_to_string(v)); From 77290bc83fe9fc3eeba1354e7635a2aa0a1caa2d Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 19 Apr 2024 13:38:38 +0200 Subject: [PATCH 2/7] vmspawn: enable hyperv enlightenments THese are generally recommended (and libvirt/gnome-boxes default to them hence). They are mostly relevant for Windows, but I think it makes sense to enable them anyway for Linux too, simply to excercise #28001, and they shouldn't hurt. --- src/vmspawn/vmspawn.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c index d3d37fd5d32..9687119df4d 100644 --- a/src/vmspawn/vmspawn.c +++ b/src/vmspawn/vmspawn.c @@ -1421,7 +1421,13 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { pass_fds[n_pass_fds++] = device_fd; } - r = strv_extend_many(&cmdline, "-cpu", "max"); + r = strv_extend_many(&cmdline, "-cpu", +#ifdef __x86_64__ + "max,hv_relaxed,hv-vapic,hv-time" +#else + "max" +#endif + ); if (r < 0) return log_oom(); From 6f9a1adf6d1341a7db98c5278e13e035c7a669d5 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 19 Apr 2024 14:41:43 +0200 Subject: [PATCH 3/7] vmspawn: add env var that can extend the qemu cmdline This is a bit hackish, but really useful sometimes to play around with some qemu switches. --- docs/ENVIRONMENT.md | 3 +++ src/vmspawn/vmspawn.c | 12 ++++++++++++ 2 files changed, 15 insertions(+) diff --git a/docs/ENVIRONMENT.md b/docs/ENVIRONMENT.md index 961601c72e0..8068d0d33cf 100644 --- a/docs/ENVIRONMENT.md +++ b/docs/ENVIRONMENT.md @@ -191,6 +191,9 @@ All tools: expected format is six groups of two hexadecimal digits separated by colons, e.g. `SYSTEMD_VMSPAWN_NETWORK_MAC=12:34:56:78:90:AB` +* `$SYSTEMD_VMSPAWN_QEMU_EXTRA=…` – may contain additional command line + arguments to append the qemu command line. + `systemd-logind`: * `$SYSTEMD_BYPASS_HIBERNATION_MEMORY_CHECK=1` — if set, report that diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c index 9687119df4d..2ebb158640e 100644 --- a/src/vmspawn/vmspawn.c +++ b/src/vmspawn/vmspawn.c @@ -1881,6 +1881,18 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { return log_error_errno(r, "Failed to call getsockname on VSOCK: %m"); } + const char *e = secure_getenv("SYSTEMD_VMSPAWN_QEMU_EXTRA"); + if (e) { + _cleanup_strv_free_ char **extra = NULL; + + r = strv_split_full(&extra, e, /* separator= */ NULL, EXTRACT_CUNESCAPE|EXTRACT_UNQUOTE); + if (r < 0) + return log_error_errno(r, "Failed to split $SYSTEMD_VMSPAWN_QEMU_EXTRA environment variable: %m"); + + if (strv_extend_strv(&cmdline, extra, /* filter_duplicates= */ false) < 0) + return log_oom(); + } + if (DEBUG_LOGGING) { _cleanup_free_ char *joined = quote_command_line(cmdline, SHELL_ESCAPE_EMPTY); if (!joined) From 615906cdcfa8b950f583970a41bc3b3156f90f08 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 19 Apr 2024 14:47:28 +0200 Subject: [PATCH 4/7] sd-id128: add an app-specific flavour of the invocation ID too --- man/sd_id128_get_machine.xml | 21 +++++++++++++++----- src/libsystemd/libsystemd.sym | 1 + src/libsystemd/sd-id128/sd-id128.c | 13 ++++++++++++ src/systemd/sd-id128.h | 1 + src/test/test-id128.c | 32 +++++++++++++++++++++++++++++- 5 files changed, 62 insertions(+), 6 deletions(-) diff --git a/man/sd_id128_get_machine.xml b/man/sd_id128_get_machine.xml index 6904f2953c6..59f3266e6fe 100644 --- a/man/sd_id128_get_machine.xml +++ b/man/sd_id128_get_machine.xml @@ -63,6 +63,12 @@ sd_id128_t *ret + + int sd_id128_get_invocation_app_specific + sd_id128_t app_id + sd_id128_t *ret + + @@ -126,12 +132,16 @@ for details. The ID is cached internally. In future a different mechanism to determine the invocation ID may be added. + sd_id128_get_invocation_app_specific() derives an application-specific ID from + the invocation ID. + Note that sd_id128_get_machine_app_specific(), - sd_id128_get_boot(), sd_id128_get_boot_app_specific(), and - sd_id128_get_invocation() always return UUID Variant 1 Version 4 compatible IDs. - sd_id128_get_machine() will also return a UUID Variant 1 Version 4 compatible ID on - new installations but might not on older. It is possible to convert the machine ID non-reversibly into a - UUID Variant 1 Version 4 compatible one. For more information, see + sd_id128_get_boot(), sd_id128_get_boot_app_specific(), + sd_id128_get_invocation() and + sd_id128_get_invocation_app_specific always return UUID Variant 1 Version 4 + compatible IDs. sd_id128_get_machine() will also return a UUID Variant 1 Version 4 + compatible ID on new installations but might not on older. It is possible to convert the machine ID + non-reversibly into a UUID Variant 1 Version 4 compatible one. For more information, see machine-id5. It is hence guaranteed that these functions will never return the ID consisting of all zero or all one bits (SD_ID128_NULL, SD_ID128_ALLF) — with the possible exception of @@ -262,6 +272,7 @@ As man:sd-id128(3) macro: sd_id128_get_machine_app_specific() was added in version 233. sd_id128_get_boot_app_specific() was added in version 240. sd_id128_get_app_specific() was added in version 255. + sd_id128_get_invocation_app_specific() was added in version 256. diff --git a/src/libsystemd/libsystemd.sym b/src/libsystemd/libsystemd.sym index 89de4b37cab..78b44534629 100644 --- a/src/libsystemd/libsystemd.sym +++ b/src/libsystemd/libsystemd.sym @@ -839,6 +839,7 @@ LIBSYSTEMD_256 { global: sd_bus_creds_get_pidfd_dup; sd_bus_creds_new_from_pidfd; + sd_id128_get_invocation_app_specific; sd_journal_stream_fd_with_namespace; sd_event_source_get_inotify_path; } LIBSYSTEMD_255; diff --git a/src/libsystemd/sd-id128/sd-id128.c b/src/libsystemd/sd-id128/sd-id128.c index 4336d3f1b70..62b8aaa347d 100644 --- a/src/libsystemd/sd-id128/sd-id128.c +++ b/src/libsystemd/sd-id128/sd-id128.c @@ -390,3 +390,16 @@ _public_ int sd_id128_get_boot_app_specific(sd_id128_t app_id, sd_id128_t *ret) return sd_id128_get_app_specific(id, app_id, ret); } + +_public_ int sd_id128_get_invocation_app_specific(sd_id128_t app_id, sd_id128_t *ret) { + sd_id128_t id; + int r; + + assert_return(ret, -EINVAL); + + r = sd_id128_get_invocation(&id); + if (r < 0) + return r; + + return sd_id128_get_app_specific(id, app_id, ret); +} diff --git a/src/systemd/sd-id128.h b/src/systemd/sd-id128.h index a984a9d85e1..a9210526b6a 100644 --- a/src/systemd/sd-id128.h +++ b/src/systemd/sd-id128.h @@ -53,6 +53,7 @@ int sd_id128_get_invocation(sd_id128_t *ret); int sd_id128_get_app_specific(sd_id128_t base, sd_id128_t app_id, sd_id128_t *ret); int sd_id128_get_machine_app_specific(sd_id128_t app_id, sd_id128_t *ret); int sd_id128_get_boot_app_specific(sd_id128_t app_id, sd_id128_t *ret); +int sd_id128_get_invocation_app_specific(sd_id128_t app_id, sd_id128_t *ret); #define SD_ID128_ARRAY(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15) \ { .bytes = { 0x##v0, 0x##v1, 0x##v2, 0x##v3, 0x##v4, 0x##v5, 0x##v6, 0x##v7, \ diff --git a/src/test/test-id128.c b/src/test/test-id128.c index 3ddbeec0fc7..48fdbba6c7d 100644 --- a/src/test/test-id128.c +++ b/src/test/test-id128.c @@ -199,7 +199,7 @@ TEST(id128) { } TEST(sd_id128_get_invocation) { - sd_id128_t id; + sd_id128_t id = SD_ID128_NULL; int r; /* Query the invocation ID */ @@ -208,6 +208,36 @@ TEST(sd_id128_get_invocation) { log_warning_errno(r, "Failed to get invocation ID, ignoring: %m"); else log_info("Invocation ID: " SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(id)); + + sd_id128_t appid = SD_ID128_NULL; + r = sd_id128_get_invocation_app_specific(SD_ID128_MAKE(59,36,e9,92,fd,11,42,fe,87,c9,e9,b5,6c,9e,4f,04), &appid); + if (r < 0) + log_warning_errno(r, "Failed to get invocation ID, ignoring: %m"); + else { + assert(!sd_id128_equal(id, appid)); + log_info("Per-App Invocation ID: " SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(appid)); + } + + sd_id128_t appid2 = SD_ID128_NULL; + r = sd_id128_get_invocation_app_specific(SD_ID128_MAKE(59,36,e9,92,fd,11,42,fe,87,c9,e9,b5,6c,9e,4f,05), &appid2); /* slightly different appid */ + if (r < 0) + log_warning_errno(r, "Failed to get invocation ID, ignoring: %m"); + else { + assert(!sd_id128_equal(id, appid2)); + assert(!sd_id128_equal(appid, appid2)); + log_info("Per-App Invocation ID 2: " SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(appid2)); + } + + sd_id128_t appid3 = SD_ID128_NULL; + r = sd_id128_get_invocation_app_specific(SD_ID128_MAKE(59,36,e9,92,fd,11,42,fe,87,c9,e9,b5,6c,9e,4f,04), &appid3); /* same appid as before */ + if (r < 0) + log_warning_errno(r, "Failed to get invocation ID, ignoring: %m"); + else { + assert(!sd_id128_equal(id, appid3)); + assert(sd_id128_equal(appid, appid3)); + assert(!sd_id128_equal(appid2, appid3)); + log_info("Per-App Invocation ID 3: " SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(appid3)); + } } TEST(benchmark_sd_id128_get_machine_app_specific) { From 9573c0ba569e8b4cc237776a7008979c23719fac Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 19 Apr 2024 14:54:12 +0200 Subject: [PATCH 5/7] vmspawn: enable vmgenid for all VMs This passes an ID derived from the vmgenid down to all VMs. This is useful to have an identifier for this VM generation id. We derive it from the invocation ID, if we have one, otherwise we randomize it. Eventually we should make use of the vmgenid changing to re-acquire MAC addresses, DHCP leases as such. Let's for now enable the VMM side of the concept as first step towards that. --- src/vmspawn/vmspawn.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c index 2ebb158640e..9366ce111da 100644 --- a/src/vmspawn/vmspawn.c +++ b/src/vmspawn/vmspawn.c @@ -1294,6 +1294,24 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { if (strv_extend_many(&cmdline, "-uuid", SD_ID128_TO_UUID_STRING(arg_uuid)) < 0) return log_oom(); + /* Derive a vmgenid automatically from the invocation ID, in a deterministic way. */ + sd_id128_t vmgenid; + r = sd_id128_get_invocation_app_specific(SD_ID128_MAKE(bd,84,6d,e3,e4,7d,4b,6c,a6,85,4a,87,0f,3c,a3,a0), &vmgenid); + if (r < 0) { + log_debug_errno(r, "Failed to get invocation ID, making up randomized vmgenid: %m"); + + r = sd_id128_randomize(&vmgenid); + if (r < 0) + return log_error_errno(r, "Failed to make up randomized vmgenid: %m"); + } + + _cleanup_free_ char *vmgenid_device = NULL; + if (asprintf(&vmgenid_device, "vmgenid,guid=" SD_ID128_UUID_FORMAT_STR, SD_ID128_FORMAT_VAL(vmgenid)) < 0) + return log_oom(); + + if (strv_extend_many(&cmdline, "-device", vmgenid_device) < 0) + return log_oom(); + /* if we are going to be starting any units with state then create our runtime dir */ if (arg_tpm != 0 || arg_directory || arg_runtime_mounts.n_mounts != 0) { r = runtime_directory(&arg_runtime_directory, arg_privileged ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER, "systemd/vmspawn"); From 895cf7015dce4b38de67253fd0932f372e2212a9 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 19 Apr 2024 14:59:37 +0200 Subject: [PATCH 6/7] update TODO --- TODO | 3 --- 1 file changed, 3 deletions(-) diff --git a/TODO b/TODO index 830d27a716c..548a7c31fc5 100644 --- a/TODO +++ b/TODO @@ -329,10 +329,7 @@ Features: PCRs. * vmspawn: - - enable hyperv extension by default (https://www.qemu.org/docs/master/system/i386/hyperv.html) - - register with machined - run in scope unit when invoked from command line, and machined registration is off - - support --directory= via virtiofs - sd_notify support - --ephemeral support - --read-only support From 8653ef5ca2310083eb128421a8b67fa9b80b4125 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Sat, 20 Apr 2024 12:09:54 +0200 Subject: [PATCH 7/7] update NEWS --- NEWS | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/NEWS b/NEWS index 240c12e9cf0..0c0212a2ecf 100644 --- a/NEWS +++ b/NEWS @@ -603,6 +603,12 @@ CHANGES WITH 256-rc1: --ssh-key-type= to optionally set up transient SSH keys to pass to the invoked VMs in order to be able to SSH into them once booted. + * systemd-vmspawn will no enable various "HyperV enlightenments" and + the "VM Generation ID" on the VMs. + + * A new environment variable $SYSTEMD_VMSPAWN_QEMU_EXTRA may carry + additional qemu command line options to pass to qemu. + systemd-repart: * systemd-repart gained new options --generate-fstab= and @@ -638,6 +644,10 @@ CHANGES WITH 256-rc1: sd_journal_stream_fd() but creates a log stream targeted at a specific log namespace. + * The sd-id128 API gained a new API call + sd_id128_get_invocation_app_specific() for acquiring an app-specific + ID that is derived from the service invocation ID. + systemd-cryptsetup/systemd-cryptenroll: * systemd-cryptenroll can now enroll directly with a PKCS11 public key