From 6190fd61d4d632cef58409ceac42d2b9882601c6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 Apr 2024 17:53:13 +0200 Subject: [PATCH 01/63] meson: do not link pixman automatically into all targets The dependency on pixman is listed manually in all sourcesets that need it. There is no need to bring into libqemuutil, since there is nothing in util/ that needs pixman either. Reported-by: Michael Tokarev Signed-off-by: Paolo Bonzini Reviewed-by: Richard Henderson Message-ID: <20240408155330.522792-2-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 91a0aa64c6..8c1271b884 100644 --- a/meson.build +++ b/meson.build @@ -3481,7 +3481,7 @@ util_ss = util_ss.apply({}) libqemuutil = static_library('qemuutil', build_by_default: false, sources: util_ss.sources() + stub_ss.sources() + genh, - dependencies: [util_ss.dependencies(), libm, threads, glib, socket, malloc, pixman]) + dependencies: [util_ss.dependencies(), libm, threads, glib, socket, malloc]) qemuutil = declare_dependency(link_with: libqemuutil, sources: genh + version_res, dependencies: [event_loop_base]) From d04c7e5535e97e4a1a55d05cfe632b8c66369b36 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 Apr 2024 17:53:14 +0200 Subject: [PATCH 02/63] tests: only build plugins if TCG is enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no way to use them for testing, if all the available accelerators use hardware virtualization. Signed-off-by: Paolo Bonzini Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-ID: <20240408155330.522792-3-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- tests/meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/meson.build b/tests/meson.build index 0a6f96f8f8..acb6807094 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -78,9 +78,9 @@ subdir('decode') if 'CONFIG_TCG' in config_all_accel subdir('fp') + subdir('plugin') endif -subdir('plugin') subdir('unit') subdir('qapi-schema') subdir('qtest') From bb23c33f9381a8986798deb896cd15c2c0932cc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 8 Apr 2024 17:53:15 +0200 Subject: [PATCH 03/63] ebpf: Restrict to system emulation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit eBPF is not used in user emulation. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-ID: <20240404194757.9343-2-philmd@linaro.org> Signed-off-by: Paolo Bonzini Message-ID: <20240408155330.522792-4-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- ebpf/meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ebpf/meson.build b/ebpf/meson.build index c5bf9295a2..bff6156f51 100644 --- a/ebpf/meson.build +++ b/ebpf/meson.build @@ -1 +1 @@ -common_ss.add(when: libbpf, if_true: files('ebpf.c', 'ebpf_rss.c'), if_false: files('ebpf_rss-stub.c')) +system_ss.add(when: libbpf, if_true: files('ebpf.c', 'ebpf_rss.c'), if_false: files('ebpf_rss-stub.c')) From 34dca3f5be81897338176cd4bbe54fecbdd5c3af Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 Apr 2024 17:53:16 +0200 Subject: [PATCH 04/63] tests/unit: match some unit tests to corresponding feature switches Try not to test code that is not used by user mode emulation, or by the block layer, unless they are being compiled; and fix test-timed-average which was not compiled with --disable-system --enable-tools. This is by no means complete, it only touches the more blatantly wrong cases. Signed-off-by: Paolo Bonzini Reviewed-by: Richard Henderson Message-ID: <20240408155330.522792-5-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- tests/unit/meson.build | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/unit/meson.build b/tests/unit/meson.build index 228a21d03c..26c109c968 100644 --- a/tests/unit/meson.build +++ b/tests/unit/meson.build @@ -18,7 +18,6 @@ tests = { 'test-forward-visitor': [testqapi], 'test-string-input-visitor': [testqapi], 'test-string-output-visitor': [testqapi], - 'test-opts-visitor': [testqapi], 'test-visitor-serialization': [testqapi], 'test-bitmap': [], 'test-resv-mem': [], @@ -46,12 +45,8 @@ tests = { 'test-qemu-opts': [], 'test-keyval': [testqapi], 'test-logging': [], - 'test-uuid': [], - 'ptimer-test': ['ptimer-test-stubs.c', meson.project_source_root() / 'hw/core/ptimer.c'], 'test-qapi-util': [], 'test-interval-tree': [], - 'test-xs-node': [qom], - 'test-virtio-dmabuf': [meson.project_source_root() / 'hw/display/virtio-dmabuf.c'], } if have_system or have_tools @@ -97,6 +92,8 @@ if have_block 'test-crypto-ivgen': [io], 'test-crypto-afsplit': [io], 'test-crypto-block': [io], + 'test-timed-average': [], + 'test-uuid': [], } if gnutls.found() and \ tasn1.found() and \ @@ -131,10 +128,13 @@ endif if have_system tests += { + 'ptimer-test': ['ptimer-test-stubs.c', meson.project_source_root() / 'hw/core/ptimer.c'], 'test-iov': [], + 'test-opts-visitor': [testqapi], + 'test-xs-node': [qom], + 'test-virtio-dmabuf': [meson.project_source_root() / 'hw/display/virtio-dmabuf.c'], 'test-qmp-cmds': [testqapi], 'test-xbzrle': [migration], - 'test-timed-average': [], 'test-util-sockets': ['socket-helpers.c'], 'test-base64': [], 'test-bufferiszero': [], From b9ad27a9a4040805e9bbae95853db41f33285ffa Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 Apr 2024 17:53:17 +0200 Subject: [PATCH 05/63] yank: only build if needed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The yank feature is not used in user emulation. Suggested-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-ID: <20240408155330.522792-6-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- util/meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/meson.build b/util/meson.build index 0ef9886be0..2ad57b10ba 100644 --- a/util/meson.build +++ b/util/meson.build @@ -60,7 +60,6 @@ util_ss.add(files('stats64.c')) util_ss.add(files('systemd.c')) util_ss.add(files('transactions.c')) util_ss.add(files('guest-random.c')) -util_ss.add(files('yank.c')) util_ss.add(files('int128.c')) util_ss.add(files('memalign.c')) util_ss.add(files('interval-tree.c')) @@ -117,6 +116,7 @@ if have_block util_ss.add(files('vfio-helpers.c')) util_ss.add(files('chardev_open.c')) endif + util_ss.add(files('yank.c')) endif if cpu == 'aarch64' From 3df4c28860e100011db3a51a8a331506a3fe51f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 8 Apr 2024 17:53:18 +0200 Subject: [PATCH 06/63] util/qemu-config: Extract QMP commands to qemu-config-qmp.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit QMP is irrelevant for user emulation. Extract the code related to QMP in a different source file, which won't be build for user emulation binaries. This avoid pulling pointless code. Signed-off-by: Philippe Mathieu-Daudé Message-ID: <20240404194757.9343-5-philmd@linaro.org> Signed-off-by: Paolo Bonzini Message-ID: <20240408155330.522792-7-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- include/qemu/config-file.h | 3 + monitor/meson.build | 1 + monitor/qemu-config-qmp.c | 206 +++++++++++++++++++++++++++++++++++++ util/qemu-config.c | 204 +----------------------------------- 4 files changed, 212 insertions(+), 202 deletions(-) create mode 100644 monitor/qemu-config-qmp.c diff --git a/include/qemu/config-file.h b/include/qemu/config-file.h index b82a778123..51b310fa3b 100644 --- a/include/qemu/config-file.h +++ b/include/qemu/config-file.h @@ -8,6 +8,9 @@ QemuOptsList *qemu_find_opts(const char *group); QemuOptsList *qemu_find_opts_err(const char *group, Error **errp); QemuOpts *qemu_find_opts_singleton(const char *group); +extern QemuOptsList *vm_config_groups[]; +extern QemuOptsList *drive_config_groups[]; + void qemu_add_opts(QemuOptsList *list); void qemu_add_drive_opts(QemuOptsList *list); int qemu_global_option(const char *str); diff --git a/monitor/meson.build b/monitor/meson.build index 5269492cf0..a71523a1ce 100644 --- a/monitor/meson.build +++ b/monitor/meson.build @@ -4,6 +4,7 @@ system_ss.add(files( 'fds.c', 'hmp-cmds.c', 'hmp.c', + 'qemu-config-qmp.c', )) system_ss.add([spice_headers, files('qmp-cmds.c')]) diff --git a/monitor/qemu-config-qmp.c b/monitor/qemu-config-qmp.c new file mode 100644 index 0000000000..24477a0e44 --- /dev/null +++ b/monitor/qemu-config-qmp.c @@ -0,0 +1,206 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-misc.h" +#include "qapi/qmp/qlist.h" +#include "qemu/option.h" +#include "qemu/config-file.h" +#include "hw/boards.h" + +static CommandLineParameterInfoList *query_option_descs(const QemuOptDesc *desc) +{ + CommandLineParameterInfoList *param_list = NULL; + CommandLineParameterInfo *info; + int i; + + for (i = 0; desc[i].name != NULL; i++) { + info = g_malloc0(sizeof(*info)); + info->name = g_strdup(desc[i].name); + + switch (desc[i].type) { + case QEMU_OPT_STRING: + info->type = COMMAND_LINE_PARAMETER_TYPE_STRING; + break; + case QEMU_OPT_BOOL: + info->type = COMMAND_LINE_PARAMETER_TYPE_BOOLEAN; + break; + case QEMU_OPT_NUMBER: + info->type = COMMAND_LINE_PARAMETER_TYPE_NUMBER; + break; + case QEMU_OPT_SIZE: + info->type = COMMAND_LINE_PARAMETER_TYPE_SIZE; + break; + } + + info->help = g_strdup(desc[i].help); + info->q_default = g_strdup(desc[i].def_value_str); + + QAPI_LIST_PREPEND(param_list, info); + } + + return param_list; +} + +/* remove repeated entry from the info list */ +static void cleanup_infolist(CommandLineParameterInfoList *head) +{ + CommandLineParameterInfoList *pre_entry, *cur, *del_entry; + + cur = head; + while (cur->next) { + pre_entry = head; + while (pre_entry != cur->next) { + if (!strcmp(pre_entry->value->name, cur->next->value->name)) { + del_entry = cur->next; + cur->next = cur->next->next; + del_entry->next = NULL; + qapi_free_CommandLineParameterInfoList(del_entry); + break; + } + pre_entry = pre_entry->next; + } + cur = cur->next; + } +} + +/* merge the description items of two parameter infolists */ +static void connect_infolist(CommandLineParameterInfoList *head, + CommandLineParameterInfoList *new) +{ + CommandLineParameterInfoList *cur; + + cur = head; + while (cur->next) { + cur = cur->next; + } + cur->next = new; +} + +/* access all the local QemuOptsLists for drive option */ +static CommandLineParameterInfoList *get_drive_infolist(void) +{ + CommandLineParameterInfoList *head = NULL, *cur; + int i; + + for (i = 0; drive_config_groups[i] != NULL; i++) { + if (!head) { + head = query_option_descs(drive_config_groups[i]->desc); + } else { + cur = query_option_descs(drive_config_groups[i]->desc); + connect_infolist(head, cur); + } + } + cleanup_infolist(head); + + return head; +} + +static CommandLineParameterInfo *objprop_to_cmdline_prop(ObjectProperty *prop) +{ + CommandLineParameterInfo *info; + + info = g_malloc0(sizeof(*info)); + info->name = g_strdup(prop->name); + + if (g_str_equal(prop->type, "bool") || g_str_equal(prop->type, "OnOffAuto")) { + info->type = COMMAND_LINE_PARAMETER_TYPE_BOOLEAN; + } else if (g_str_equal(prop->type, "int")) { + info->type = COMMAND_LINE_PARAMETER_TYPE_NUMBER; + } else if (g_str_equal(prop->type, "size")) { + info->type = COMMAND_LINE_PARAMETER_TYPE_SIZE; + } else { + info->type = COMMAND_LINE_PARAMETER_TYPE_STRING; + } + + if (prop->description) { + info->help = g_strdup(prop->description); + } + + return info; +} + +static CommandLineParameterInfoList *query_all_machine_properties(void) +{ + CommandLineParameterInfoList *params = NULL, *clpiter; + CommandLineParameterInfo *info; + GSList *machines, *curr_mach; + ObjectPropertyIterator op_iter; + ObjectProperty *prop; + bool is_new; + + machines = object_class_get_list(TYPE_MACHINE, false); + assert(machines); + + /* Loop over all machine classes */ + for (curr_mach = machines; curr_mach; curr_mach = curr_mach->next) { + object_class_property_iter_init(&op_iter, curr_mach->data); + /* ... and over the properties of each machine: */ + while ((prop = object_property_iter_next(&op_iter))) { + if (!prop->set) { + continue; + } + /* + * Check whether the property has already been put into the list + * (via another machine class) + */ + is_new = true; + for (clpiter = params; clpiter != NULL; clpiter = clpiter->next) { + if (g_str_equal(clpiter->value->name, prop->name)) { + is_new = false; + break; + } + } + /* If it hasn't been added before, add it now to the list */ + if (is_new) { + info = objprop_to_cmdline_prop(prop); + QAPI_LIST_PREPEND(params, info); + } + } + } + + g_slist_free(machines); + + /* Add entry for the "type" parameter */ + info = g_malloc0(sizeof(*info)); + info->name = g_strdup("type"); + info->type = COMMAND_LINE_PARAMETER_TYPE_STRING; + info->help = g_strdup("machine type"); + QAPI_LIST_PREPEND(params, info); + + return params; +} + +CommandLineOptionInfoList *qmp_query_command_line_options(const char *option, + Error **errp) +{ + CommandLineOptionInfoList *conf_list = NULL; + CommandLineOptionInfo *info; + int i; + + for (i = 0; vm_config_groups[i] != NULL; i++) { + if (!option || !strcmp(option, vm_config_groups[i]->name)) { + info = g_malloc0(sizeof(*info)); + info->option = g_strdup(vm_config_groups[i]->name); + if (!strcmp("drive", vm_config_groups[i]->name)) { + info->parameters = get_drive_infolist(); + } else { + info->parameters = + query_option_descs(vm_config_groups[i]->desc); + } + QAPI_LIST_PREPEND(conf_list, info); + } + } + + if (!option || !strcmp(option, "machine")) { + info = g_malloc0(sizeof(*info)); + info->option = g_strdup("machine"); + info->parameters = query_all_machine_properties(); + QAPI_LIST_PREPEND(conf_list, info); + } + + if (conf_list == NULL) { + error_setg(errp, "invalid option name: %s", option); + } + + return conf_list; +} diff --git a/util/qemu-config.c b/util/qemu-config.c index 42076efe1e..a90c18dad2 100644 --- a/util/qemu-config.c +++ b/util/qemu-config.c @@ -1,16 +1,14 @@ #include "qemu/osdep.h" #include "block/qdict.h" /* for qdict_extract_subqdict() */ #include "qapi/error.h" -#include "qapi/qapi-commands-misc.h" #include "qapi/qmp/qdict.h" #include "qapi/qmp/qlist.h" #include "qemu/error-report.h" #include "qemu/option.h" #include "qemu/config-file.h" -#include "hw/boards.h" -static QemuOptsList *vm_config_groups[48]; -static QemuOptsList *drive_config_groups[5]; +QemuOptsList *vm_config_groups[48]; +QemuOptsList *drive_config_groups[5]; static QemuOptsList *find_list(QemuOptsList **lists, const char *group, Error **errp) @@ -55,204 +53,6 @@ QemuOpts *qemu_find_opts_singleton(const char *group) return opts; } -static CommandLineParameterInfoList *query_option_descs(const QemuOptDesc *desc) -{ - CommandLineParameterInfoList *param_list = NULL; - CommandLineParameterInfo *info; - int i; - - for (i = 0; desc[i].name != NULL; i++) { - info = g_malloc0(sizeof(*info)); - info->name = g_strdup(desc[i].name); - - switch (desc[i].type) { - case QEMU_OPT_STRING: - info->type = COMMAND_LINE_PARAMETER_TYPE_STRING; - break; - case QEMU_OPT_BOOL: - info->type = COMMAND_LINE_PARAMETER_TYPE_BOOLEAN; - break; - case QEMU_OPT_NUMBER: - info->type = COMMAND_LINE_PARAMETER_TYPE_NUMBER; - break; - case QEMU_OPT_SIZE: - info->type = COMMAND_LINE_PARAMETER_TYPE_SIZE; - break; - } - - info->help = g_strdup(desc[i].help); - info->q_default = g_strdup(desc[i].def_value_str); - - QAPI_LIST_PREPEND(param_list, info); - } - - return param_list; -} - -/* remove repeated entry from the info list */ -static void cleanup_infolist(CommandLineParameterInfoList *head) -{ - CommandLineParameterInfoList *pre_entry, *cur, *del_entry; - - cur = head; - while (cur->next) { - pre_entry = head; - while (pre_entry != cur->next) { - if (!strcmp(pre_entry->value->name, cur->next->value->name)) { - del_entry = cur->next; - cur->next = cur->next->next; - del_entry->next = NULL; - qapi_free_CommandLineParameterInfoList(del_entry); - break; - } - pre_entry = pre_entry->next; - } - cur = cur->next; - } -} - -/* merge the description items of two parameter infolists */ -static void connect_infolist(CommandLineParameterInfoList *head, - CommandLineParameterInfoList *new) -{ - CommandLineParameterInfoList *cur; - - cur = head; - while (cur->next) { - cur = cur->next; - } - cur->next = new; -} - -/* access all the local QemuOptsLists for drive option */ -static CommandLineParameterInfoList *get_drive_infolist(void) -{ - CommandLineParameterInfoList *head = NULL, *cur; - int i; - - for (i = 0; drive_config_groups[i] != NULL; i++) { - if (!head) { - head = query_option_descs(drive_config_groups[i]->desc); - } else { - cur = query_option_descs(drive_config_groups[i]->desc); - connect_infolist(head, cur); - } - } - cleanup_infolist(head); - - return head; -} - -static CommandLineParameterInfo *objprop_to_cmdline_prop(ObjectProperty *prop) -{ - CommandLineParameterInfo *info; - - info = g_malloc0(sizeof(*info)); - info->name = g_strdup(prop->name); - - if (g_str_equal(prop->type, "bool") || g_str_equal(prop->type, "OnOffAuto")) { - info->type = COMMAND_LINE_PARAMETER_TYPE_BOOLEAN; - } else if (g_str_equal(prop->type, "int")) { - info->type = COMMAND_LINE_PARAMETER_TYPE_NUMBER; - } else if (g_str_equal(prop->type, "size")) { - info->type = COMMAND_LINE_PARAMETER_TYPE_SIZE; - } else { - info->type = COMMAND_LINE_PARAMETER_TYPE_STRING; - } - - if (prop->description) { - info->help = g_strdup(prop->description); - } - - return info; -} - -static CommandLineParameterInfoList *query_all_machine_properties(void) -{ - CommandLineParameterInfoList *params = NULL, *clpiter; - CommandLineParameterInfo *info; - GSList *machines, *curr_mach; - ObjectPropertyIterator op_iter; - ObjectProperty *prop; - bool is_new; - - machines = object_class_get_list(TYPE_MACHINE, false); - assert(machines); - - /* Loop over all machine classes */ - for (curr_mach = machines; curr_mach; curr_mach = curr_mach->next) { - object_class_property_iter_init(&op_iter, curr_mach->data); - /* ... and over the properties of each machine: */ - while ((prop = object_property_iter_next(&op_iter))) { - if (!prop->set) { - continue; - } - /* - * Check whether the property has already been put into the list - * (via another machine class) - */ - is_new = true; - for (clpiter = params; clpiter != NULL; clpiter = clpiter->next) { - if (g_str_equal(clpiter->value->name, prop->name)) { - is_new = false; - break; - } - } - /* If it hasn't been added before, add it now to the list */ - if (is_new) { - info = objprop_to_cmdline_prop(prop); - QAPI_LIST_PREPEND(params, info); - } - } - } - - g_slist_free(machines); - - /* Add entry for the "type" parameter */ - info = g_malloc0(sizeof(*info)); - info->name = g_strdup("type"); - info->type = COMMAND_LINE_PARAMETER_TYPE_STRING; - info->help = g_strdup("machine type"); - QAPI_LIST_PREPEND(params, info); - - return params; -} - -CommandLineOptionInfoList *qmp_query_command_line_options(const char *option, - Error **errp) -{ - CommandLineOptionInfoList *conf_list = NULL; - CommandLineOptionInfo *info; - int i; - - for (i = 0; vm_config_groups[i] != NULL; i++) { - if (!option || !strcmp(option, vm_config_groups[i]->name)) { - info = g_malloc0(sizeof(*info)); - info->option = g_strdup(vm_config_groups[i]->name); - if (!strcmp("drive", vm_config_groups[i]->name)) { - info->parameters = get_drive_infolist(); - } else { - info->parameters = - query_option_descs(vm_config_groups[i]->desc); - } - QAPI_LIST_PREPEND(conf_list, info); - } - } - - if (!option || !strcmp(option, "machine")) { - info = g_malloc0(sizeof(*info)); - info->option = g_strdup("machine"); - info->parameters = query_all_machine_properties(); - QAPI_LIST_PREPEND(conf_list, info); - } - - if (conf_list == NULL) { - error_setg(errp, "invalid option name: %s", option); - } - - return conf_list; -} - QemuOptsList *qemu_find_opts_err(const char *group, Error **errp) { return find_list(vm_config_groups, group, errp); From 971febb8f5e810c2d167ac9cb5bd1cdaf6ca688d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 Apr 2024 17:53:19 +0200 Subject: [PATCH 07/63] hw/core: Move system emulation files to system_ss hotplug.c, qdev-hotplug.c and reset.c are not used by user emulation and need not be included in hwcore_ss. Move them to system_ss, where they belong, by letting the linker pull in the stubs when needed. Signed-off-by: Paolo Bonzini Reviewed-by: Richard Henderson Message-ID: <20240408155330.522792-8-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- hw/core/meson.build | 14 +++----------- {hw/core => stubs}/hotplug-stubs.c | 0 stubs/meson.build | 1 + 3 files changed, 4 insertions(+), 11 deletions(-) rename {hw/core => stubs}/hotplug-stubs.c (100%) diff --git a/hw/core/meson.build b/hw/core/meson.build index e26f2e088c..f20d4143f7 100644 --- a/hw/core/meson.build +++ b/hw/core/meson.build @@ -3,7 +3,6 @@ hwcore_ss.add(files( 'bus.c', 'qdev-properties.c', 'qdev.c', - 'reset.c', 'resetcontainer.c', 'resettable.c', 'vmstate-if.c', @@ -12,16 +11,6 @@ hwcore_ss.add(files( 'clock.c', 'qdev-clock.c', )) -if have_system - hwcore_ss.add(files( - 'hotplug.c', - 'qdev-hotplug.c', - )) -else - hwcore_ss.add(files( - 'hotplug-stubs.c', - )) -endif common_ss.add(files('cpu-common.c')) common_ss.add(files('machine-smp.c')) @@ -40,6 +29,7 @@ system_ss.add(files( 'cpu-sysemu.c', 'fw-path-provider.c', 'gpio.c', + 'hotplug.c', 'loader.c', 'machine-hmp-cmds.c', 'machine-qmp-cmds.c', @@ -48,7 +38,9 @@ system_ss.add(files( 'null-machine.c', 'numa.c', 'qdev-fw.c', + 'qdev-hotplug.c', 'qdev-properties-system.c', + 'reset.c', 'sysbus.c', 'vm-change-state-handler.c', 'clock-vmstate.c', diff --git a/hw/core/hotplug-stubs.c b/stubs/hotplug-stubs.c similarity index 100% rename from hw/core/hotplug-stubs.c rename to stubs/hotplug-stubs.c diff --git a/stubs/meson.build b/stubs/meson.build index 0bf25e6ca5..f87f5c1110 100644 --- a/stubs/meson.build +++ b/stubs/meson.build @@ -14,6 +14,7 @@ stub_ss.add(files('fdset.c')) stub_ss.add(files('gdbstub.c')) stub_ss.add(files('get-vm-name.c')) stub_ss.add(files('graph-lock.c')) +stub_ss.add(files('hotplug-stubs.c')) if linux_io_uring.found() stub_ss.add(files('io_uring.c')) endif From 68621262bd4317dfcdd511d8005b8558c3ddc353 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 8 Apr 2024 17:53:20 +0200 Subject: [PATCH 08/63] hw: Include minimal source set in user emulation build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only the files in hwcore_ss[] are required to link a user emulation binary. Have meson process the hw/ sub-directories if system emulation is selected, otherwise directly process hw/core/ to get hwcore_ss[], which is the only set required by user emulation. This removes about 10% from the time needed to run "../configure --disable-system --disable-tools --disable-guest-agent". Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-ID: <20240404194757.9343-8-philmd@linaro.org> Signed-off-by: Paolo Bonzini Message-ID: <20240408155330.522792-9-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- meson.build | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 8c1271b884..84e59dcbb4 100644 --- a/meson.build +++ b/meson.build @@ -3451,8 +3451,12 @@ subdir('qom') subdir('authz') subdir('crypto') subdir('ui') -subdir('hw') subdir('gdbstub') +if have_system + subdir('hw') +else + subdir('hw/core') +endif if enable_modules libmodulecommon = static_library('module-common', files('module-common.c') + genh, pic: true, c_args: '-DBUILD_DSO') From d4d0ebb7da0489a2fff8c7150a38fa897ca3eea5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 Apr 2024 17:53:21 +0200 Subject: [PATCH 09/63] stubs: remove obsolete stubs These file define functions are are not called from common code anymore. Delete those functions and, if applicable, the entire files. Signed-off-by: Paolo Bonzini Acked-by: Richard Henderson Message-ID: <20240408155330.522792-10-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- include/sysemu/sysemu.h | 2 -- stubs/isa-bus.c | 7 ------- stubs/meson.build | 3 --- stubs/module-opts.c | 2 -- stubs/monitor-core.c | 6 ------ stubs/pci-bus.c | 7 ------- stubs/qdev.c | 6 ------ stubs/qtest.c | 10 ---------- stubs/usb-dev-stub.c | 5 ----- 9 files changed, 48 deletions(-) delete mode 100644 stubs/isa-bus.c delete mode 100644 stubs/module-opts.c delete mode 100644 stubs/pci-bus.c diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index eb1dc1e4ed..5b4397eeb8 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -71,8 +71,6 @@ Chardev *serial_hd(int i); extern Chardev *parallel_hds[MAX_PARALLEL_PORTS]; -void hmp_info_usb(Monitor *mon, const QDict *qdict); - void add_boot_device_path(int32_t bootindex, DeviceState *dev, const char *suffix); char *get_boot_devices_list(size_t *size); diff --git a/stubs/isa-bus.c b/stubs/isa-bus.c deleted file mode 100644 index 522f448997..0000000000 --- a/stubs/isa-bus.c +++ /dev/null @@ -1,7 +0,0 @@ -#include "qemu/osdep.h" -#include "hw/isa/isa.h" - -ISADevice *isa_create_simple(ISABus *bus, const char *name) -{ - g_assert_not_reached(); -} diff --git a/stubs/meson.build b/stubs/meson.build index f87f5c1110..aa7120f711 100644 --- a/stubs/meson.build +++ b/stubs/meson.build @@ -22,13 +22,11 @@ stub_ss.add(files('iothread-lock.c')) if have_block stub_ss.add(files('iothread-lock-block.c')) endif -stub_ss.add(files('isa-bus.c')) stub_ss.add(files('is-daemonized.c')) if libaio.found() stub_ss.add(files('linux-aio.c')) endif stub_ss.add(files('migr-blocker.c')) -stub_ss.add(files('module-opts.c')) stub_ss.add(files('monitor.c')) stub_ss.add(files('monitor-core.c')) stub_ss.add(files('physmem.c')) @@ -57,7 +55,6 @@ if have_block or have_ga endif if have_system stub_ss.add(files('fw_cfg.c')) - stub_ss.add(files('pci-bus.c')) stub_ss.add(files('semihost.c')) stub_ss.add(files('usb-dev-stub.c')) stub_ss.add(files('xen-hw-stub.c')) diff --git a/stubs/module-opts.c b/stubs/module-opts.c deleted file mode 100644 index 5412429ea8..0000000000 --- a/stubs/module-opts.c +++ /dev/null @@ -1,2 +0,0 @@ -#include "qemu/osdep.h" -#include "qemu/config-file.h" diff --git a/stubs/monitor-core.c b/stubs/monitor-core.c index afa477aae6..1894cdfe1f 100644 --- a/stubs/monitor-core.c +++ b/stubs/monitor-core.c @@ -12,10 +12,6 @@ Monitor *monitor_set_cur(Coroutine *co, Monitor *mon) return NULL; } -void monitor_init_qmp(Chardev *chr, bool pretty, Error **errp) -{ -} - void qapi_event_emit(QAPIEvent event, QDict *qdict) { } @@ -24,5 +20,3 @@ int monitor_vprintf(Monitor *mon, const char *fmt, va_list ap) { abort(); } - - diff --git a/stubs/pci-bus.c b/stubs/pci-bus.c deleted file mode 100644 index a8932fa932..0000000000 --- a/stubs/pci-bus.c +++ /dev/null @@ -1,7 +0,0 @@ -#include "qemu/osdep.h" -#include "hw/pci/pci.h" - -PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name) -{ - g_assert_not_reached(); -} diff --git a/stubs/qdev.c b/stubs/qdev.c index 6869f6f90a..7e957b3e52 100644 --- a/stubs/qdev.c +++ b/stubs/qdev.c @@ -20,9 +20,3 @@ void qapi_event_send_device_deleted(const char *device, { /* Nothing to do. */ } - -void qapi_event_send_device_unplug_guest_error(const char *device, - const char *path) -{ - /* Nothing to do. */ -} diff --git a/stubs/qtest.c b/stubs/qtest.c index 4666a49d7d..39e376eb67 100644 --- a/stubs/qtest.c +++ b/stubs/qtest.c @@ -13,13 +13,3 @@ /* Needed for qtest_allowed() */ bool qtest_allowed; - -bool qtest_driver(void) -{ - return false; -} - -int64_t qtest_get_virtual_clock(void) -{ - return 0; -} diff --git a/stubs/usb-dev-stub.c b/stubs/usb-dev-stub.c index aa557692b7..fcabe8429e 100644 --- a/stubs/usb-dev-stub.c +++ b/stubs/usb-dev-stub.c @@ -26,8 +26,3 @@ HumanReadableText *qmp_x_query_usb(Error **errp) error_setg(errp, "Support for USB devices not built-in"); return NULL; } - -void hmp_info_usb(Monitor *mon, const QDict *qdict) -{ - monitor_printf(mon, "Support for USB devices not built-in\n"); -} From 89857312f3245e589b161eb535261bae7b7fcce2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 Apr 2024 17:53:22 +0200 Subject: [PATCH 10/63] hw/usb: move stubs out of stubs/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the USB stubs are needed exactly when the Kconfig symbols are not enabled, they can be placed in hw/usb/ and conditionalized on CONFIG_USB. Signed-off-by: Paolo Bonzini Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-ID: <20240408155330.522792-11-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- stubs/usb-dev-stub.c => hw/usb/bus-stub.c | 0 hw/usb/meson.build | 2 +- stubs/meson.build | 1 - 3 files changed, 1 insertion(+), 2 deletions(-) rename stubs/usb-dev-stub.c => hw/usb/bus-stub.c (100%) diff --git a/stubs/usb-dev-stub.c b/hw/usb/bus-stub.c similarity index 100% rename from stubs/usb-dev-stub.c rename to hw/usb/bus-stub.c diff --git a/hw/usb/meson.build b/hw/usb/meson.build index aac3bb35f2..23f7f7acb5 100644 --- a/hw/usb/meson.build +++ b/hw/usb/meson.build @@ -9,7 +9,7 @@ system_ss.add(when: 'CONFIG_USB', if_true: files( 'desc-msos.c', 'libhw.c', 'pcap.c', -)) +), if_false: files('bus-stub.c')) # usb host adapters system_ss.add(when: 'CONFIG_USB_UHCI', if_true: files('hcd-uhci.c')) diff --git a/stubs/meson.build b/stubs/meson.build index aa7120f711..45616afbfa 100644 --- a/stubs/meson.build +++ b/stubs/meson.build @@ -56,7 +56,6 @@ endif if have_system stub_ss.add(files('fw_cfg.c')) stub_ss.add(files('semihost.c')) - stub_ss.add(files('usb-dev-stub.c')) stub_ss.add(files('xen-hw-stub.c')) stub_ss.add(files('virtio-md-pci.c')) else From f2604d8508a12a2060e854283fa076c9f09d1d10 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 Apr 2024 17:53:23 +0200 Subject: [PATCH 11/63] hw/virtio: move stubs out of stubs/ Since the virtio memory device stubs are needed exactly when the Kconfig symbol is not enabled, they can be placed in hw/virtio/ and conditionalized on CONFIG_VIRTIO_MD. Signed-off-by: Paolo Bonzini Reviewed-by: Richard Henderson Message-ID: <20240408155330.522792-12-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- hw/virtio/meson.build | 2 ++ stubs/virtio-md-pci.c => hw/virtio/virtio-md-stubs.c | 0 stubs/meson.build | 1 - 3 files changed, 2 insertions(+), 1 deletion(-) rename stubs/virtio-md-pci.c => hw/virtio/virtio-md-stubs.c (100%) diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index d7f18c96e6..621fc65454 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -87,6 +87,8 @@ specific_virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) system_ss.add_all(when: 'CONFIG_VIRTIO', if_true: system_virtio_ss) system_ss.add(when: 'CONFIG_VIRTIO', if_false: files('vhost-stub.c')) system_ss.add(when: 'CONFIG_VIRTIO', if_false: files('virtio-stub.c')) +system_ss.add(when: 'CONFIG_VIRTIO_MD', if_false: files('virtio-md-stubs.c')) + system_ss.add(files('virtio-hmp-cmds.c')) specific_ss.add_all(when: 'CONFIG_VIRTIO', if_true: specific_virtio_ss) diff --git a/stubs/virtio-md-pci.c b/hw/virtio/virtio-md-stubs.c similarity index 100% rename from stubs/virtio-md-pci.c rename to hw/virtio/virtio-md-stubs.c diff --git a/stubs/meson.build b/stubs/meson.build index 45616afbfa..60e32d363f 100644 --- a/stubs/meson.build +++ b/stubs/meson.build @@ -57,7 +57,6 @@ if have_system stub_ss.add(files('fw_cfg.c')) stub_ss.add(files('semihost.c')) stub_ss.add(files('xen-hw-stub.c')) - stub_ss.add(files('virtio-md-pci.c')) else stub_ss.add(files('qdev.c')) endif From 5837db465053f57414f671448b370a4b29250bae Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 Apr 2024 17:53:24 +0200 Subject: [PATCH 12/63] semihosting: move stubs out of stubs/ Since the semihosting stubs are needed exactly when the Kconfig symbols are not needed, move them to semihosting/ and conditionalize them on CONFIG_SEMIHOSTING and/or CONFIG_SYSTEM_ONLY. Signed-off-by: Paolo Bonzini Message-ID: <20240408155330.522792-13-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- semihosting/meson.build | 3 +++ stubs/semihost-all.c => semihosting/stubs-all.c | 0 stubs/semihost.c => semihosting/stubs-system.c | 0 stubs/meson.build | 2 -- 4 files changed, 3 insertions(+), 2 deletions(-) rename stubs/semihost-all.c => semihosting/stubs-all.c (100%) rename stubs/semihost.c => semihosting/stubs-system.c (100%) diff --git a/semihosting/meson.build b/semihosting/meson.build index b07cbd980f..34933e5a19 100644 --- a/semihosting/meson.build +++ b/semihosting/meson.build @@ -9,5 +9,8 @@ specific_ss.add(when: ['CONFIG_SEMIHOSTING', 'CONFIG_SYSTEM_ONLY'], if_true: fil 'uaccess.c', )) +common_ss.add(when: ['CONFIG_SEMIHOSTING', 'CONFIG_SYSTEM_ONLY'], if_false: files('stubs-all.c')) +system_ss.add(when: ['CONFIG_SEMIHOSTING'], if_false: files('stubs-system.c')) + specific_ss.add(when: ['CONFIG_ARM_COMPATIBLE_SEMIHOSTING'], if_true: files('arm-compat-semi.c')) diff --git a/stubs/semihost-all.c b/semihosting/stubs-all.c similarity index 100% rename from stubs/semihost-all.c rename to semihosting/stubs-all.c diff --git a/stubs/semihost.c b/semihosting/stubs-system.c similarity index 100% rename from stubs/semihost.c rename to semihosting/stubs-system.c diff --git a/stubs/meson.build b/stubs/meson.build index 60e32d363f..84ecaa4daa 100644 --- a/stubs/meson.build +++ b/stubs/meson.build @@ -55,9 +55,7 @@ if have_block or have_ga endif if have_system stub_ss.add(files('fw_cfg.c')) - stub_ss.add(files('semihost.c')) stub_ss.add(files('xen-hw-stub.c')) else stub_ss.add(files('qdev.c')) endif -stub_ss.add(files('semihost-all.c')) From 5643190b74df601719ca713ea21eea4997a26e78 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 Apr 2024 17:53:25 +0200 Subject: [PATCH 13/63] ramfb: move stubs out of stubs/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the ramfb stubs are needed exactly when the Kconfig symbols are not needed, move them to hw/display/ and compile them when ramfb.c is absent. Signed-off-by: Paolo Bonzini Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-ID: <20240408155330.522792-14-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- hw/display/meson.build | 2 +- stubs/ramfb.c => hw/display/ramfb-stubs.c | 0 stubs/meson.build | 1 - 3 files changed, 1 insertion(+), 2 deletions(-) rename stubs/ramfb.c => hw/display/ramfb-stubs.c (100%) diff --git a/hw/display/meson.build b/hw/display/meson.build index f93a69f70f..4751aab3ba 100644 --- a/hw/display/meson.build +++ b/hw/display/meson.build @@ -3,7 +3,7 @@ hw_display_modules = {} system_ss.add(when: 'CONFIG_DDC', if_true: files('i2c-ddc.c')) system_ss.add(when: 'CONFIG_EDID', if_true: files('edid-generate.c', 'edid-region.c')) -system_ss.add(when: 'CONFIG_FW_CFG_DMA', if_true: files('ramfb.c')) +system_ss.add(when: 'CONFIG_FW_CFG_DMA', if_true: files('ramfb.c'), if_false: files('ramfb-stubs.c')) system_ss.add(when: 'CONFIG_FW_CFG_DMA', if_true: files('ramfb-standalone.c')) system_ss.add(when: 'CONFIG_VGA_CIRRUS', if_true: files('cirrus_vga.c')) diff --git a/stubs/ramfb.c b/hw/display/ramfb-stubs.c similarity index 100% rename from stubs/ramfb.c rename to hw/display/ramfb-stubs.c diff --git a/stubs/meson.build b/stubs/meson.build index 84ecaa4daa..92887660e4 100644 --- a/stubs/meson.build +++ b/stubs/meson.build @@ -36,7 +36,6 @@ stub_ss.add(files('qmp-command-available.c')) stub_ss.add(files('qmp-quit.c')) stub_ss.add(files('qtest.c')) stub_ss.add(files('ram-block.c')) -stub_ss.add(files('ramfb.c')) stub_ss.add(files('replay.c')) stub_ss.add(files('runstate-check.c')) stub_ss.add(files('sysbus.c')) From 2c888febdfa0290a9e1d1f2133b2a24de47f6120 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 Apr 2024 17:53:26 +0200 Subject: [PATCH 14/63] memory-device: move stubs out of stubs/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the memory-device stubs are needed exactly when the Kconfig symbols are not needed, move them to hw/mem/. Signed-off-by: Paolo Bonzini Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-ID: <20240408155330.522792-15-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- stubs/memory_device.c => hw/mem/memory-device-stubs.c | 0 hw/mem/meson.build | 1 + stubs/meson.build | 1 - 3 files changed, 1 insertion(+), 1 deletion(-) rename stubs/memory_device.c => hw/mem/memory-device-stubs.c (100%) diff --git a/stubs/memory_device.c b/hw/mem/memory-device-stubs.c similarity index 100% rename from stubs/memory_device.c rename to hw/mem/memory-device-stubs.c diff --git a/hw/mem/meson.build b/hw/mem/meson.build index faee1fe936..1c1c6da24b 100644 --- a/hw/mem/meson.build +++ b/hw/mem/meson.build @@ -6,6 +6,7 @@ mem_ss.add(when: 'CONFIG_NVDIMM', if_true: files('nvdimm.c')) mem_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_true: files('cxl_type3.c')) system_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_false: files('cxl_type3_stubs.c')) +system_ss.add(when: 'CONFIG_MEM_DEVICE', if_false: files('memory-device-stubs.c')) system_ss.add_all(when: 'CONFIG_MEM_DEVICE', if_true: mem_ss) system_ss.add(when: 'CONFIG_SPARSE_MEM', if_true: files('sparse-mem.c')) diff --git a/stubs/meson.build b/stubs/meson.build index 92887660e4..a4404e765a 100644 --- a/stubs/meson.build +++ b/stubs/meson.build @@ -31,7 +31,6 @@ stub_ss.add(files('monitor.c')) stub_ss.add(files('monitor-core.c')) stub_ss.add(files('physmem.c')) stub_ss.add(files('qemu-timer-notify-cb.c')) -stub_ss.add(files('memory_device.c')) stub_ss.add(files('qmp-command-available.c')) stub_ss.add(files('qmp-quit.c')) stub_ss.add(files('qtest.c')) From 857f504cf279a063e375c32e88d5cf3312d6b30c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 Apr 2024 17:53:27 +0200 Subject: [PATCH 15/63] colo: move stubs out of stubs/ Since the colo stubs are needed exactly when the build options are not enabled, move them together with the code they stub. Signed-off-by: Paolo Bonzini Message-ID: <20240408155330.522792-16-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- stubs/colo.c => migration/colo-stubs.c | 0 migration/meson.build | 2 ++ stubs/colo-compare.c => net/colo-stubs.c | 0 net/meson.build | 2 ++ stubs/meson.build | 2 -- 5 files changed, 4 insertions(+), 2 deletions(-) rename stubs/colo.c => migration/colo-stubs.c (100%) rename stubs/colo-compare.c => net/colo-stubs.c (100%) diff --git a/stubs/colo.c b/migration/colo-stubs.c similarity index 100% rename from stubs/colo.c rename to migration/colo-stubs.c diff --git a/migration/meson.build b/migration/meson.build index 1eeb915ff6..f76b1ba328 100644 --- a/migration/meson.build +++ b/migration/meson.build @@ -34,6 +34,8 @@ system_ss.add(files( if get_option('replication').allowed() system_ss.add(files('colo-failover.c', 'colo.c')) +else + system_ss.add(files('colo-stubs.c')) endif system_ss.add(when: rdma, if_true: files('rdma.c')) diff --git a/stubs/colo-compare.c b/net/colo-stubs.c similarity index 100% rename from stubs/colo-compare.c rename to net/colo-stubs.c diff --git a/net/meson.build b/net/meson.build index 9432a588e4..e0cd71470e 100644 --- a/net/meson.build +++ b/net/meson.build @@ -20,6 +20,8 @@ if get_option('replication').allowed() or \ get_option('colo_proxy').allowed() system_ss.add(files('colo-compare.c')) system_ss.add(files('colo.c')) +else + system_ss.add(files('colo-stubs.c')) endif if get_option('colo_proxy').allowed() diff --git a/stubs/meson.build b/stubs/meson.build index a4404e765a..a252bffad0 100644 --- a/stubs/meson.build +++ b/stubs/meson.build @@ -42,8 +42,6 @@ stub_ss.add(files('target-get-monitor-def.c')) stub_ss.add(files('target-monitor-defs.c')) stub_ss.add(files('trace-control.c')) stub_ss.add(files('uuid.c')) -stub_ss.add(files('colo.c')) -stub_ss.add(files('colo-compare.c')) stub_ss.add(files('vmstate.c')) stub_ss.add(files('vm-stop.c')) stub_ss.add(files('win32-kbd-hook.c')) From 957eca9e73279453899193c053d1a47304e9aa58 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 Apr 2024 17:53:28 +0200 Subject: [PATCH 16/63] stubs: split record/replay stubs further replay.c symbols are only needed by user mode emulation, with the exception of replay_mode that is needed by both user mode emulation (by way of qemu_guest_getrandom) and block layer tools (by way of util/qemu-timer.c). Since it is needed by libqemuutil rather than specific files that are part of the tools and emulators, split the replay_mode stub into its own file. Signed-off-by: Paolo Bonzini Message-ID: <20240408155330.522792-17-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- stubs/meson.build | 1 + stubs/replay-mode.c | 4 ++++ stubs/replay.c | 2 -- 3 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 stubs/replay-mode.c diff --git a/stubs/meson.build b/stubs/meson.build index a252bffad0..4a524f5816 100644 --- a/stubs/meson.build +++ b/stubs/meson.build @@ -36,6 +36,7 @@ stub_ss.add(files('qmp-quit.c')) stub_ss.add(files('qtest.c')) stub_ss.add(files('ram-block.c')) stub_ss.add(files('replay.c')) +stub_ss.add(files('replay-mode.c')) stub_ss.add(files('runstate-check.c')) stub_ss.add(files('sysbus.c')) stub_ss.add(files('target-get-monitor-def.c')) diff --git a/stubs/replay-mode.c b/stubs/replay-mode.c new file mode 100644 index 0000000000..264be9d96c --- /dev/null +++ b/stubs/replay-mode.c @@ -0,0 +1,4 @@ +#include "qemu/osdep.h" +#include "sysemu/replay.h" + +ReplayMode replay_mode; diff --git a/stubs/replay.c b/stubs/replay.c index 42c92e4acb..b4dd6a566e 100644 --- a/stubs/replay.c +++ b/stubs/replay.c @@ -1,8 +1,6 @@ #include "qemu/osdep.h" #include "exec/replay-core.h" -ReplayMode replay_mode; - void replay_finish(void) { } From 3a15604900c4f433c970cc6294520a98f201287e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 Apr 2024 17:53:29 +0200 Subject: [PATCH 17/63] stubs: include stubs only if needed Currently it is not documented anywhere why some functions need to be stubbed. Group the files in stubs/meson.build according to who needs them, both to reduce the size of the compilation and to clarify the use of stubs. Signed-off-by: Paolo Bonzini Message-ID: <20240408155330.522792-18-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- stubs/meson.build | 122 +++++++++++++++--------- stubs/{monitor.c => monitor-internal.c} | 0 2 files changed, 75 insertions(+), 47 deletions(-) rename stubs/{monitor.c => monitor-internal.c} (100%) diff --git a/stubs/meson.build b/stubs/meson.build index 4a524f5816..8ee1fd5753 100644 --- a/stubs/meson.build +++ b/stubs/meson.build @@ -1,58 +1,86 @@ -stub_ss.add(files('bdrv-next-monitor-owned.c')) -stub_ss.add(files('blk-commit-all.c')) -stub_ss.add(files('blk-exp-close-all.c')) -stub_ss.add(files('blockdev-close-all-bdrv-states.c')) -stub_ss.add(files('change-state-handler.c')) -stub_ss.add(files('cmos.c')) +# If possible, add new files to other directories, by using "if_false". +# If you need them here, try to add them under one of the if statements +# below, so that it is clear who needs the stubbed functionality. + stub_ss.add(files('cpu-get-clock.c')) -stub_ss.add(files('cpus-get-virtual-clock.c')) -stub_ss.add(files('qemu-timer-notify-cb.c')) -stub_ss.add(files('icount.c')) -stub_ss.add(files('dump.c')) -stub_ss.add(files('error-printf.c')) stub_ss.add(files('fdset.c')) -stub_ss.add(files('gdbstub.c')) -stub_ss.add(files('get-vm-name.c')) -stub_ss.add(files('graph-lock.c')) -stub_ss.add(files('hotplug-stubs.c')) -if linux_io_uring.found() - stub_ss.add(files('io_uring.c')) -endif stub_ss.add(files('iothread-lock.c')) -if have_block - stub_ss.add(files('iothread-lock-block.c')) -endif stub_ss.add(files('is-daemonized.c')) -if libaio.found() - stub_ss.add(files('linux-aio.c')) -endif -stub_ss.add(files('migr-blocker.c')) -stub_ss.add(files('monitor.c')) stub_ss.add(files('monitor-core.c')) -stub_ss.add(files('physmem.c')) -stub_ss.add(files('qemu-timer-notify-cb.c')) -stub_ss.add(files('qmp-command-available.c')) -stub_ss.add(files('qmp-quit.c')) -stub_ss.add(files('qtest.c')) -stub_ss.add(files('ram-block.c')) -stub_ss.add(files('replay.c')) stub_ss.add(files('replay-mode.c')) -stub_ss.add(files('runstate-check.c')) -stub_ss.add(files('sysbus.c')) -stub_ss.add(files('target-get-monitor-def.c')) -stub_ss.add(files('target-monitor-defs.c')) stub_ss.add(files('trace-control.c')) -stub_ss.add(files('uuid.c')) -stub_ss.add(files('vmstate.c')) -stub_ss.add(files('vm-stop.c')) -stub_ss.add(files('win32-kbd-hook.c')) -stub_ss.add(files('cpu-synchronize-state.c')) -if have_block or have_ga + +if have_block + stub_ss.add(files('bdrv-next-monitor-owned.c')) + stub_ss.add(files('blk-commit-all.c')) + stub_ss.add(files('blk-exp-close-all.c')) + stub_ss.add(files('blockdev-close-all-bdrv-states.c')) + stub_ss.add(files('change-state-handler.c')) + stub_ss.add(files('get-vm-name.c')) + stub_ss.add(files('iothread-lock-block.c')) + stub_ss.add(files('migr-blocker.c')) + stub_ss.add(files('physmem.c')) + stub_ss.add(files('ram-block.c')) stub_ss.add(files('replay-tools.c')) + stub_ss.add(files('runstate-check.c')) + stub_ss.add(files('uuid.c')) endif -if have_system - stub_ss.add(files('fw_cfg.c')) - stub_ss.add(files('xen-hw-stub.c')) -else + +if have_block or have_ga + # stubs for hooks in util/main-loop.c, util/async.c etc. + stub_ss.add(files('cpus-get-virtual-clock.c')) + stub_ss.add(files('icount.c')) + stub_ss.add(files('graph-lock.c')) + if linux_io_uring.found() + stub_ss.add(files('io_uring.c')) + endif + if libaio.found() + stub_ss.add(files('linux-aio.c')) + endif + stub_ss.add(files('qemu-timer-notify-cb.c')) + + # stubs for monitor + stub_ss.add(files('monitor-internal.c')) + stub_ss.add(files('qmp-command-available.c')) + stub_ss.add(files('qmp-quit.c')) +endif + +if have_block or have_user + stub_ss.add(files('qtest.c')) + stub_ss.add(files('vm-stop.c')) + stub_ss.add(files('vmstate.c')) + + # more symbols provided by the monitor + stub_ss.add(files('error-printf.c')) +endif + +if have_user + # Symbols that are used by hw/core. + stub_ss.add(files('cpu-synchronize-state.c')) stub_ss.add(files('qdev.c')) endif + +if have_system + # Symbols that are only needed in some configurations. Try not + # adding more of these. If the symbol is used in specific_ss, + # in particular, consider defining a preprocessor macro via + # Kconfig or configs/targets/. + stub_ss.add(files('dump.c')) + stub_ss.add(files('cmos.c')) + stub_ss.add(files('fw_cfg.c')) + stub_ss.add(files('target-get-monitor-def.c')) + stub_ss.add(files('target-monitor-defs.c')) + stub_ss.add(files('win32-kbd-hook.c')) + stub_ss.add(files('xen-hw-stub.c')) +endif + +if have_system or have_user + stub_ss.add(files('gdbstub.c')) + + # Also included in have_system for --disable-tcg builds + stub_ss.add(files('replay.c')) + + # Also included in have_system for tests/unit/test-qdev-global-props + stub_ss.add(files('hotplug-stubs.c')) + stub_ss.add(files('sysbus.c')) +endif diff --git a/stubs/monitor.c b/stubs/monitor-internal.c similarity index 100% rename from stubs/monitor.c rename to stubs/monitor-internal.c From 748e62dbf5065c7d166c827425c7797389b5f9fe Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 Apr 2024 17:53:30 +0200 Subject: [PATCH 18/63] stubs: move monitor_fdsets_cleanup with other fdset stubs Even though monitor_get_fd() has to remain separate because it is mocked by tests/unit/test-util-sockets, monitor_fdsets_cleanup() is logically part of the stubs for monitor/fds.c, so move it there. Signed-off-by: Paolo Bonzini Reviewed-by: Richard Henderson Message-ID: <20240408155330.522792-19-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- stubs/fdset.c | 6 ++++++ stubs/monitor-internal.c | 5 ----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/stubs/fdset.c b/stubs/fdset.c index 56b3663d58..d7c39a28ac 100644 --- a/stubs/fdset.c +++ b/stubs/fdset.c @@ -1,5 +1,7 @@ #include "qemu/osdep.h" +#include "qapi/error.h" #include "monitor/monitor.h" +#include "../monitor/monitor-internal.h" int monitor_fdset_dup_fd_add(int64_t fdset_id, int flags) { @@ -15,3 +17,7 @@ int64_t monitor_fdset_dup_fd_find(int dup_fd) void monitor_fdset_dup_fd_remove(int dupfd) { } + +void monitor_fdsets_cleanup(void) +{ +} diff --git a/stubs/monitor-internal.c b/stubs/monitor-internal.c index 20786ac4ff..4fece49d53 100644 --- a/stubs/monitor-internal.c +++ b/stubs/monitor-internal.c @@ -1,7 +1,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "monitor/monitor.h" -#include "../monitor/monitor-internal.h" int monitor_get_fd(Monitor *mon, const char *name, Error **errp) { @@ -12,7 +11,3 @@ int monitor_get_fd(Monitor *mon, const char *name, Error **errp) void monitor_init_hmp(Chardev *chr, bool use_readline, Error **errp) { } - -void monitor_fdsets_cleanup(void) -{ -} From ab75ecb79be3b856f63bef4c91aef0dc17d405cb Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 2 Apr 2024 12:31:35 +0200 Subject: [PATCH 19/63] vga: optimize computation of dirty memory region The depth == 0 and depth == 15 have to be special cased because width * depth / 8 does not provide the correct scanline length. However, thanks to the recent reorganization of vga_draw_graphic() the correct value of VRAM bits per pixel is available in "bits". Use it (via the same "bwidth" computation that is used later in the function), thus restricting the slow path to the wraparound case. Signed-off-by: Paolo Bonzini --- hw/display/vga.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/hw/display/vga.c b/hw/display/vga.c index 77f59e8c11..77d709a3d6 100644 --- a/hw/display/vga.c +++ b/hw/display/vga.c @@ -1574,22 +1574,16 @@ static void vga_draw_graphic(VGACommonState *s, int full_update) /* Horizontal pel panning bit 3 is only used in text mode. */ hpel = bits <= 8 ? s->params.hpel & 7 : 0; + bwidth = DIV_ROUND_UP(width * bits, 8); /* scanline length */ + if (hpel) { + bwidth += 4; + } region_start = (s->params.start_addr * 4); - region_end = region_start + (ram_addr_t)s->params.line_offset * height; - region_end += width * depth / 8; /* scanline length */ - region_end -= s->params.line_offset; - if (hpel) { - region_end += 4; - } - if (region_end > s->vbe_size || depth == 0 || depth == 15) { + region_end = region_start + (ram_addr_t)s->params.line_offset * (height - 1) + bwidth; + if (region_end > s->vbe_size) { /* - * We land here on: - * - wraps around (can happen with cirrus vbe modes) - * - depth == 0 (256 color palette video mode) - * - depth == 15 - * - * Take the safe and slow route: + * On wrap around take the safe and slow route: * - create a dirty bitmap snapshot for all vga memory. * - force shadowing (so all vga memory access goes * through vga_read_*() helpers). @@ -1667,10 +1661,6 @@ static void vga_draw_graphic(VGACommonState *s, int full_update) s->params.line_compare, sr(s, VGA_SEQ_CLOCK_MODE)); #endif addr1 = (s->params.start_addr * 4); - bwidth = DIV_ROUND_UP(width * bits, 8); - if (hpel) { - bwidth += 4; - } y_start = -1; d = surface_data(surface); linesize = surface_stride(surface); From f89761d349999a3d898b35fd85737cf25114d44a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 2 Apr 2024 13:27:59 +0200 Subject: [PATCH 20/63] vga: move dirty memory region code together Take into account split screen mode close to wrap around, which is the other special case for dirty memory region computation. Signed-off-by: Paolo Bonzini --- hw/display/vga.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/display/vga.c b/hw/display/vga.c index 77d709a3d6..e91a76bf76 100644 --- a/hw/display/vga.c +++ b/hw/display/vga.c @@ -1596,6 +1596,10 @@ static void vga_draw_graphic(VGACommonState *s, int full_update) region_end = s->vbe_size; force_shadow = true; } + if (s->params.line_compare < height) { + /* split screen mode */ + region_start = 0; + } /* * Check whether we can share the surface with the backend @@ -1667,10 +1671,6 @@ static void vga_draw_graphic(VGACommonState *s, int full_update) y1 = 0; if (!full_update) { - if (s->params.line_compare < height) { - /* split screen mode */ - region_start = 0; - } snap = memory_region_snapshot_and_clear_dirty(&s->vram, region_start, region_end - region_start, DIRTY_MEMORY_VGA); From 1e1e48792a92652af05f59b4df2f643542c71d90 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Mar 2024 19:29:07 +0100 Subject: [PATCH 21/63] kvm: use configs/ definition to conditionalize debug support If an architecture adds support for KVM_CAP_SET_GUEST_DEBUG but QEMU does not have the necessary code, QEMU will fail to build after updating kernel headers. Avoid this by using a #define in config-target.h instead of KVM_CAP_SET_GUEST_DEBUG. Signed-off-by: Paolo Bonzini --- accel/kvm/kvm-accel-ops.c | 4 ++-- accel/kvm/kvm-all.c | 10 +++++----- configs/targets/aarch64-softmmu.mak | 1 + configs/targets/i386-softmmu.mak | 1 + configs/targets/ppc-softmmu.mak | 1 + configs/targets/ppc64-softmmu.mak | 1 + configs/targets/s390x-softmmu.mak | 1 + configs/targets/x86_64-softmmu.mak | 1 + include/sysemu/kvm.h | 2 +- include/sysemu/kvm_int.h | 2 +- 10 files changed, 15 insertions(+), 9 deletions(-) diff --git a/accel/kvm/kvm-accel-ops.c b/accel/kvm/kvm-accel-ops.c index b3c946dc4b..f5ac643fca 100644 --- a/accel/kvm/kvm-accel-ops.c +++ b/accel/kvm/kvm-accel-ops.c @@ -85,7 +85,7 @@ static bool kvm_cpus_are_resettable(void) return !kvm_enabled() || kvm_cpu_check_are_resettable(); } -#ifdef KVM_CAP_SET_GUEST_DEBUG +#ifdef TARGET_KVM_HAVE_GUEST_DEBUG static int kvm_update_guest_debug_ops(CPUState *cpu) { return kvm_update_guest_debug(cpu, 0); @@ -104,7 +104,7 @@ static void kvm_accel_ops_class_init(ObjectClass *oc, void *data) ops->synchronize_state = kvm_cpu_synchronize_state; ops->synchronize_pre_loadvm = kvm_cpu_synchronize_pre_loadvm; -#ifdef KVM_CAP_SET_GUEST_DEBUG +#ifdef TARGET_KVM_HAVE_GUEST_DEBUG ops->update_guest_debug = kvm_update_guest_debug_ops; ops->supports_guest_debug = kvm_supports_guest_debug; ops->insert_breakpoint = kvm_insert_breakpoint; diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 931f74256e..d58916e33a 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -2360,7 +2360,7 @@ static int kvm_init(MachineState *ms) s->sigmask_len = 8; accel_blocker_init(); -#ifdef KVM_CAP_SET_GUEST_DEBUG +#ifdef TARGET_KVM_HAVE_GUEST_DEBUG QTAILQ_INIT(&s->kvm_sw_breakpoints); #endif QLIST_INIT(&s->kvm_parked_vcpus); @@ -2544,7 +2544,7 @@ static int kvm_init(MachineState *ms) kvm_vm_attributes_allowed = (kvm_check_extension(s, KVM_CAP_VM_ATTRIBUTES) > 0); -#ifdef KVM_CAP_SET_GUEST_DEBUG +#ifdef TARGET_KVM_HAVE_GUEST_DEBUG kvm_has_guest_debug = (kvm_check_extension(s, KVM_CAP_SET_GUEST_DEBUG) > 0); #endif @@ -2553,7 +2553,7 @@ static int kvm_init(MachineState *ms) if (kvm_has_guest_debug) { kvm_sstep_flags = SSTEP_ENABLE; -#if defined KVM_CAP_SET_GUEST_DEBUG2 +#if defined TARGET_KVM_HAVE_GUEST_DEBUG int guest_debug_flags = kvm_check_extension(s, KVM_CAP_SET_GUEST_DEBUG2); @@ -3157,7 +3157,7 @@ bool kvm_arm_supports_user_irq(void) return kvm_check_extension(kvm_state, KVM_CAP_ARM_USER_IRQ); } -#ifdef KVM_CAP_SET_GUEST_DEBUG +#ifdef TARGET_KVM_HAVE_GUEST_DEBUG struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *cpu, vaddr pc) { struct kvm_sw_breakpoint *bp; @@ -3317,7 +3317,7 @@ void kvm_remove_all_breakpoints(CPUState *cpu) } } -#endif /* !KVM_CAP_SET_GUEST_DEBUG */ +#endif /* !TARGET_KVM_HAVE_GUEST_DEBUG */ static int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset) { diff --git a/configs/targets/aarch64-softmmu.mak b/configs/targets/aarch64-softmmu.mak index b4338e9568..83c22391a6 100644 --- a/configs/targets/aarch64-softmmu.mak +++ b/configs/targets/aarch64-softmmu.mak @@ -1,5 +1,6 @@ TARGET_ARCH=aarch64 TARGET_BASE_ARCH=arm TARGET_SUPPORTS_MTTCG=y +TARGET_KVM_HAVE_GUEST_DEBUG=y TARGET_XML_FILES= gdb-xml/aarch64-core.xml gdb-xml/aarch64-fpu.xml gdb-xml/arm-core.xml gdb-xml/arm-vfp.xml gdb-xml/arm-vfp3.xml gdb-xml/arm-vfp-sysregs.xml gdb-xml/arm-neon.xml gdb-xml/arm-m-profile.xml gdb-xml/arm-m-profile-mve.xml gdb-xml/aarch64-pauth.xml TARGET_NEED_FDT=y diff --git a/configs/targets/i386-softmmu.mak b/configs/targets/i386-softmmu.mak index 6b3c99fc86..d61b507613 100644 --- a/configs/targets/i386-softmmu.mak +++ b/configs/targets/i386-softmmu.mak @@ -1,4 +1,5 @@ TARGET_ARCH=i386 TARGET_SUPPORTS_MTTCG=y TARGET_NEED_FDT=y +TARGET_KVM_HAVE_GUEST_DEBUG=y TARGET_XML_FILES= gdb-xml/i386-32bit.xml diff --git a/configs/targets/ppc-softmmu.mak b/configs/targets/ppc-softmmu.mak index 774440108f..f3ea9c98f7 100644 --- a/configs/targets/ppc-softmmu.mak +++ b/configs/targets/ppc-softmmu.mak @@ -1,4 +1,5 @@ TARGET_ARCH=ppc TARGET_BIG_ENDIAN=y +TARGET_KVM_HAVE_GUEST_DEBUG=y TARGET_XML_FILES= gdb-xml/power-core.xml gdb-xml/power-fpu.xml gdb-xml/power-altivec.xml gdb-xml/power-spe.xml TARGET_NEED_FDT=y diff --git a/configs/targets/ppc64-softmmu.mak b/configs/targets/ppc64-softmmu.mak index ddf0c39617..1db8d8381d 100644 --- a/configs/targets/ppc64-softmmu.mak +++ b/configs/targets/ppc64-softmmu.mak @@ -2,5 +2,6 @@ TARGET_ARCH=ppc64 TARGET_BASE_ARCH=ppc TARGET_BIG_ENDIAN=y TARGET_SUPPORTS_MTTCG=y +TARGET_KVM_HAVE_GUEST_DEBUG=y TARGET_XML_FILES= gdb-xml/power64-core.xml gdb-xml/power-fpu.xml gdb-xml/power-altivec.xml gdb-xml/power-spe.xml gdb-xml/power-vsx.xml TARGET_NEED_FDT=y diff --git a/configs/targets/s390x-softmmu.mak b/configs/targets/s390x-softmmu.mak index 70d2f9f0ba..b22218aacc 100644 --- a/configs/targets/s390x-softmmu.mak +++ b/configs/targets/s390x-softmmu.mak @@ -1,4 +1,5 @@ TARGET_ARCH=s390x TARGET_BIG_ENDIAN=y TARGET_SUPPORTS_MTTCG=y +TARGET_KVM_HAVE_GUEST_DEBUG=y TARGET_XML_FILES= gdb-xml/s390x-core64.xml gdb-xml/s390-acr.xml gdb-xml/s390-fpr.xml gdb-xml/s390-vx.xml gdb-xml/s390-cr.xml gdb-xml/s390-virt.xml gdb-xml/s390-virt-kvm.xml gdb-xml/s390-gs.xml diff --git a/configs/targets/x86_64-softmmu.mak b/configs/targets/x86_64-softmmu.mak index 197817c943..c5f882e5ba 100644 --- a/configs/targets/x86_64-softmmu.mak +++ b/configs/targets/x86_64-softmmu.mak @@ -2,4 +2,5 @@ TARGET_ARCH=x86_64 TARGET_BASE_ARCH=i386 TARGET_SUPPORTS_MTTCG=y TARGET_NEED_FDT=y +TARGET_KVM_HAVE_GUEST_DEBUG=y TARGET_XML_FILES= gdb-xml/i386-64bit.xml diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index fad9a7e8ff..2cba899270 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -224,7 +224,7 @@ void kvm_flush_coalesced_mmio_buffer(void); * calling down to kvm_arch_update_guest_debug after the generic * fields have been set. */ -#ifdef KVM_CAP_SET_GUEST_DEBUG +#ifdef TARGET_KVM_HAVE_GUEST_DEBUG int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap); #else static inline int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap) diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h index 882e37e12c..94488d2c1a 100644 --- a/include/sysemu/kvm_int.h +++ b/include/sysemu/kvm_int.h @@ -78,7 +78,7 @@ struct KVMState struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; bool coalesced_flush_in_progress; int vcpu_events; -#ifdef KVM_CAP_SET_GUEST_DEBUG +#ifdef TARGET_KVM_HAVE_GUEST_DEBUG QTAILQ_HEAD(, kvm_sw_breakpoint) kvm_sw_breakpoints; #endif int max_nested_state_len; From 85fa9acda887438feb0711970bb528959a37568e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 25 Mar 2024 15:01:51 +0100 Subject: [PATCH 22/63] hw: Add compat machines for 9.1 Add 9.1 machine types for arm/i440fx/m68k/q35/s390x/spapr. Reviewed-by: Cornelia Huck Acked-by: Thomas Huth Reviewed-by: Harsh Prateek Bora Reviewed-by: Zhao Liu Cc: Gavin Shan Signed-off-by: Paolo Bonzini --- hw/arm/virt.c | 11 +++++++++-- hw/core/machine.c | 3 +++ hw/i386/pc.c | 3 +++ hw/i386/pc_piix.c | 17 ++++++++++++++--- hw/i386/pc_q35.c | 14 ++++++++++++-- hw/m68k/virt.c | 11 +++++++++-- hw/ppc/spapr.c | 17 ++++++++++++++--- hw/s390x/s390-virtio-ccw.c | 14 +++++++++++++- include/hw/boards.h | 3 +++ include/hw/i386/pc.h | 3 +++ 10 files changed, 83 insertions(+), 13 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c index a9a913aead..c9119ef384 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -3223,10 +3223,17 @@ static void machvirt_machine_init(void) } type_init(machvirt_machine_init); -static void virt_machine_9_0_options(MachineClass *mc) +static void virt_machine_9_1_options(MachineClass *mc) { } -DEFINE_VIRT_MACHINE_AS_LATEST(9, 0) +DEFINE_VIRT_MACHINE_AS_LATEST(9, 1) + +static void virt_machine_9_0_options(MachineClass *mc) +{ + virt_machine_9_1_options(mc); + compat_props_add(mc->compat_props, hw_compat_9_0, hw_compat_9_0_len); +} +DEFINE_VIRT_MACHINE(9, 0) static void virt_machine_8_2_options(MachineClass *mc) { diff --git a/hw/core/machine.c b/hw/core/machine.c index 37ede0e7d4..a92bec2314 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -33,6 +33,9 @@ #include "hw/virtio/virtio-iommu.h" #include "audio/audio.h" +GlobalProperty hw_compat_9_0[] = {}; +const size_t hw_compat_9_0_len = G_N_ELEMENTS(hw_compat_9_0); + GlobalProperty hw_compat_8_2[] = { { "migration", "zero-page-detection", "legacy"}, { TYPE_VIRTIO_IOMMU_PCI, "granule", "4k" }, diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 5c21b0c4db..b0d818b209 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -78,6 +78,9 @@ { "qemu64-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },\ { "athlon-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, }, +GlobalProperty pc_compat_9_0[] = {}; +const size_t pc_compat_9_0_len = G_N_ELEMENTS(pc_compat_9_0); + GlobalProperty pc_compat_8_2[] = {}; const size_t pc_compat_8_2_len = G_N_ELEMENTS(pc_compat_8_2); diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 18ba076609..8850c49c66 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -513,13 +513,26 @@ static void pc_i440fx_machine_options(MachineClass *m) "Use a different south bridge than PIIX3"); } -static void pc_i440fx_9_0_machine_options(MachineClass *m) +static void pc_i440fx_9_1_machine_options(MachineClass *m) { pc_i440fx_machine_options(m); m->alias = "pc"; m->is_default = true; } +DEFINE_I440FX_MACHINE(v9_1, "pc-i440fx-9.1", NULL, + pc_i440fx_9_1_machine_options); + +static void pc_i440fx_9_0_machine_options(MachineClass *m) +{ + pc_i440fx_9_1_machine_options(m); + m->alias = NULL; + m->is_default = false; + + compat_props_add(m->compat_props, hw_compat_9_0, hw_compat_9_0_len); + compat_props_add(m->compat_props, pc_compat_9_0, pc_compat_9_0_len); +} + DEFINE_I440FX_MACHINE(v9_0, "pc-i440fx-9.0", NULL, pc_i440fx_9_0_machine_options); @@ -528,8 +541,6 @@ static void pc_i440fx_8_2_machine_options(MachineClass *m) PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_i440fx_9_0_machine_options(m); - m->alias = NULL; - m->is_default = false; compat_props_add(m->compat_props, hw_compat_8_2, hw_compat_8_2_len); compat_props_add(m->compat_props, pc_compat_8_2, pc_compat_8_2_len); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index c7bc8a2041..6e1180d4b6 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -365,12 +365,23 @@ static void pc_q35_machine_options(MachineClass *m) pc_q35_compat_defaults, pc_q35_compat_defaults_len); } -static void pc_q35_9_0_machine_options(MachineClass *m) +static void pc_q35_9_1_machine_options(MachineClass *m) { pc_q35_machine_options(m); m->alias = "q35"; } +DEFINE_Q35_MACHINE(v9_1, "pc-q35-9.1", NULL, + pc_q35_9_1_machine_options); + +static void pc_q35_9_0_machine_options(MachineClass *m) +{ + pc_q35_9_1_machine_options(m); + m->alias = NULL; + compat_props_add(m->compat_props, hw_compat_9_0, hw_compat_9_0_len); + compat_props_add(m->compat_props, pc_compat_9_0, pc_compat_9_0_len); +} + DEFINE_Q35_MACHINE(v9_0, "pc-q35-9.0", NULL, pc_q35_9_0_machine_options); @@ -378,7 +389,6 @@ static void pc_q35_8_2_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_q35_9_0_machine_options(m); - m->alias = NULL; m->max_cpus = 1024; compat_props_add(m->compat_props, hw_compat_8_2, hw_compat_8_2_len); compat_props_add(m->compat_props, pc_compat_8_2, pc_compat_8_2_len); diff --git a/hw/m68k/virt.c b/hw/m68k/virt.c index b8e5e102e6..09bc9bdfef 100644 --- a/hw/m68k/virt.c +++ b/hw/m68k/virt.c @@ -357,10 +357,17 @@ type_init(virt_machine_register_types) } \ type_init(machvirt_machine_##major##_##minor##_init); -static void virt_machine_9_0_options(MachineClass *mc) +static void virt_machine_9_1_options(MachineClass *mc) { } -DEFINE_VIRT_MACHINE(9, 0, true) +DEFINE_VIRT_MACHINE(9, 1, true) + +static void virt_machine_9_0_options(MachineClass *mc) +{ + virt_machine_9_1_options(mc); + compat_props_add(mc->compat_props, hw_compat_9_0, hw_compat_9_0_len); +} +DEFINE_VIRT_MACHINE(9, 0, false) static void virt_machine_8_2_options(MachineClass *mc) { diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index e9bc97fee0..36ada4d0ba 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -4806,14 +4806,25 @@ static void spapr_machine_latest_class_options(MachineClass *mc) type_init(spapr_machine_register_##suffix) /* - * pseries-9.0 + * pseries-9.1 */ -static void spapr_machine_9_0_class_options(MachineClass *mc) +static void spapr_machine_9_1_class_options(MachineClass *mc) { /* Defaults for the latest behaviour inherited from the base class */ } -DEFINE_SPAPR_MACHINE(9_0, "9.0", true); +DEFINE_SPAPR_MACHINE(9_1, "9.1", true); + +/* + * pseries-9.0 + */ +static void spapr_machine_9_0_class_options(MachineClass *mc) +{ + spapr_machine_9_1_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_9_0, hw_compat_9_0_len); +} + +DEFINE_SPAPR_MACHINE(9_0, "9.0", false); /* * pseries-8.2 diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index b1dcb3857f..7cebbf6c02 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -859,14 +859,26 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +static void ccw_machine_9_1_instance_options(MachineState *machine) +{ +} + +static void ccw_machine_9_1_class_options(MachineClass *mc) +{ +} +DEFINE_CCW_MACHINE(9_1, "9.1", true); + static void ccw_machine_9_0_instance_options(MachineState *machine) { + ccw_machine_9_1_instance_options(machine); } static void ccw_machine_9_0_class_options(MachineClass *mc) { + ccw_machine_9_1_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_9_0, hw_compat_9_0_len); } -DEFINE_CCW_MACHINE(9_0, "9.0", true); +DEFINE_CCW_MACHINE(9_0, "9.0", false); static void ccw_machine_8_2_instance_options(MachineState *machine) { diff --git a/include/hw/boards.h b/include/hw/boards.h index 8b8f6d5c00..50e0cf4278 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -425,6 +425,9 @@ struct MachineState { } \ type_init(machine_initfn##_register_types) +extern GlobalProperty hw_compat_9_0[]; +extern const size_t hw_compat_9_0_len; + extern GlobalProperty hw_compat_8_2[]; extern const size_t hw_compat_8_2_len; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 27a68071d7..349f79df08 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -198,6 +198,9 @@ void pc_system_parse_ovmf_flash(uint8_t *flash_ptr, size_t flash_size); /* sgx.c */ void pc_machine_init_sgx_epc(PCMachineState *pcms); +extern GlobalProperty pc_compat_9_0[]; +extern const size_t pc_compat_9_0_len; + extern GlobalProperty pc_compat_8_2[]; extern const size_t pc_compat_8_2_len; From 513ba32dccc659c80722b3a43233b26eaa50309a Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Mon, 18 Mar 2024 16:53:36 +0100 Subject: [PATCH 23/63] target/i386: add guest-phys-bits cpu property Allows to set guest-phys-bits (cpuid leaf 80000008, eax[23:16]) via -cpu $model,guest-phys-bits=$nr. Signed-off-by: Gerd Hoffmann Message-ID: <20240318155336.156197-3-kraxel@redhat.com> Reviewed-by: Zhao Liu Signed-off-by: Paolo Bonzini --- hw/i386/pc.c | 4 +++- target/i386/cpu.c | 22 ++++++++++++++++++++++ target/i386/cpu.h | 8 ++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index b0d818b209..41909f7bd7 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -78,7 +78,9 @@ { "qemu64-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },\ { "athlon-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, }, -GlobalProperty pc_compat_9_0[] = {}; +GlobalProperty pc_compat_9_0[] = { + { TYPE_X86_CPU, "guest-phys-bits", "0" }, +}; const size_t pc_compat_9_0_len = G_N_ELEMENTS(pc_compat_9_0); GlobalProperty pc_compat_8_2[] = {}; diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 33760a2ee1..eef3d08473 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -6570,6 +6570,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) { /* 64 bit processor */ *eax |= (cpu_x86_virtual_addr_width(env) << 8); + *eax |= (cpu->guest_phys_bits << 16); } *ebx = env->features[FEAT_8000_0008_EBX]; if (cs->nr_cores * cs->nr_threads > 1) { @@ -7329,6 +7330,14 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) goto out; } + if (cpu->guest_phys_bits == -1) { + /* + * If it was not set by the user, or by the accelerator via + * cpu_exec_realizefn, clear. + */ + cpu->guest_phys_bits = 0; + } + if (cpu->ucode_rev == 0) { /* * The default is the same as KVM's. Note that this check @@ -7379,6 +7388,14 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) if (cpu->phys_bits == 0) { cpu->phys_bits = TCG_PHYS_ADDR_BITS; } + if (cpu->guest_phys_bits && + (cpu->guest_phys_bits > cpu->phys_bits || + cpu->guest_phys_bits < 32)) { + error_setg(errp, "guest-phys-bits should be between 32 and %u " + " (but is %u)", + cpu->phys_bits, cpu->guest_phys_bits); + return; + } } else { /* For 32 bit systems don't use the user set value, but keep * phys_bits consistent with what we tell the guest. @@ -7387,6 +7404,10 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) error_setg(errp, "phys-bits is not user-configurable in 32 bit"); return; } + if (cpu->guest_phys_bits != 0) { + error_setg(errp, "guest-phys-bits is not user-configurable in 32 bit"); + return; + } if (env->features[FEAT_1_EDX] & (CPUID_PSE36 | CPUID_PAE)) { cpu->phys_bits = 36; @@ -7887,6 +7908,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("x-force-features", X86CPU, force_features, false), DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), DEFINE_PROP_UINT32("phys-bits", X86CPU, phys_bits, 0), + DEFINE_PROP_UINT32("guest-phys-bits", X86CPU, guest_phys_bits, -1), DEFINE_PROP_BOOL("host-phys-bits", X86CPU, host_phys_bits, false), DEFINE_PROP_UINT8("host-phys-bits-limit", X86CPU, host_phys_bits_limit, 0), DEFINE_PROP_BOOL("fill-mtrr-mask", X86CPU, fill_mtrr_mask, true), diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 6b05738079..6112e27bfd 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -2027,6 +2027,14 @@ struct ArchCPU { /* Number of physical address bits supported */ uint32_t phys_bits; + /* + * Number of guest physical address bits available. Usually this is + * identical to host physical address bits. With NPT or EPT 4-level + * paging, guest physical address space might be restricted to 48 bits + * even if the host cpu supports more physical address bits. + */ + uint32_t guest_phys_bits; + /* in order to simplify APIC support, we leave this pointer to the user */ struct DeviceState *apic_state; From 0d08c423688edcca857f88dab20f1fc56de2b281 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Mon, 18 Mar 2024 16:53:35 +0100 Subject: [PATCH 24/63] kvm: add support for guest physical bits Query kvm for supported guest physical address bits, in cpuid function 80000008, eax[23:16]. Usually this is identical to host physical address bits. With NPT or EPT being used this might be restricted to 48 (max 4-level paging address space size) even if the host cpu supports more physical address bits. When set pass this to the guest, using cpuid too. Guest firmware can use this to figure how big the usable guest physical address space is, so PCI bar mapping are actually reachable. Signed-off-by: Gerd Hoffmann Reviewed-by: Xiaoyao Li Reviewed-by: Zhao Liu Message-ID: <20240318155336.156197-2-kraxel@redhat.com> Signed-off-by: Paolo Bonzini --- target/i386/kvm/kvm-cpu.c | 50 ++++++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c index 9c791b7b05..f76972e47e 100644 --- a/target/i386/kvm/kvm-cpu.c +++ b/target/i386/kvm/kvm-cpu.c @@ -18,10 +18,32 @@ #include "kvm_i386.h" #include "hw/core/accel-cpu.h" +static void kvm_set_guest_phys_bits(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + uint32_t eax, guest_phys_bits; + + eax = kvm_arch_get_supported_cpuid(cs->kvm_state, 0x80000008, 0, R_EAX); + guest_phys_bits = (eax >> 16) & 0xff; + if (!guest_phys_bits) { + return; + } + cpu->guest_phys_bits = guest_phys_bits; + if (cpu->guest_phys_bits > cpu->phys_bits) { + cpu->guest_phys_bits = cpu->phys_bits; + } + + if (cpu->host_phys_bits && cpu->host_phys_bits_limit && + cpu->guest_phys_bits > cpu->host_phys_bits_limit) { + cpu->guest_phys_bits = cpu->host_phys_bits_limit; + } +} + static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; + bool ret; /* * The realize order is important, since x86_cpu_realize() checks if @@ -32,13 +54,15 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) * * realize order: * - * x86_cpu_realize(): - * -> x86_cpu_expand_features() - * -> cpu_exec_realizefn(): - * -> accel_cpu_common_realize() - * kvm_cpu_realizefn() -> host_cpu_realizefn() - * -> cpu_common_realizefn() - * -> check/update ucode_rev, phys_bits, mwait + * x86_cpu_realizefn(): + * x86_cpu_expand_features() + * cpu_exec_realizefn(): + * accel_cpu_common_realize() + * kvm_cpu_realizefn() + * host_cpu_realizefn() + * kvm_set_guest_phys_bits() + * check/update ucode_rev, phys_bits, guest_phys_bits, mwait + * cpu_common_realizefn() (via xcc->parent_realize) */ if (cpu->max_features) { if (enable_cpu_pm && kvm_has_waitpkg()) { @@ -50,7 +74,17 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) MSR_IA32_UCODE_REV); } } - return host_cpu_realizefn(cs, errp); + ret = host_cpu_realizefn(cs, errp); + if (!ret) { + return ret; + } + + if ((env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) && + cpu->guest_phys_bits == -1) { + kvm_set_guest_phys_bits(cs); + } + + return true; } static bool lmce_supported(void) From a5acf4f26c208a05d05ef1bde65553ce2ab5e5d0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 29 Feb 2024 01:36:43 -0500 Subject: [PATCH 25/63] i386/kvm: Move architectural CPUID leaf generation to separate helper Move the architectural (for lack of a better term) CPUID leaf generation to a separate helper so that the generation code can be reused by TDX, which needs to generate a canonical VM-scoped configuration. For now this is just a cleanup, so keep the function static. Signed-off-by: Sean Christopherson Signed-off-by: Xiaoyao Li Message-ID: <20240229063726.610065-23-xiaoyao.li@intel.com> Reviewed-by: Xiaoyao Li Signed-off-by: Paolo Bonzini --- target/i386/kvm/kvm.c | 449 +++++++++++++++++++++--------------------- 1 file changed, 227 insertions(+), 222 deletions(-) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index e68cbe9293..f1b59011d1 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -1706,6 +1706,231 @@ static void kvm_init_nested_state(CPUX86State *env) } } +static uint32_t kvm_x86_build_cpuid(CPUX86State *env, + struct kvm_cpuid_entry2 *entries, + uint32_t cpuid_i) +{ + uint32_t limit, i, j; + uint32_t unused; + struct kvm_cpuid_entry2 *c; + + cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused); + + for (i = 0; i <= limit; i++) { + j = 0; + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { + goto full; + } + c = &entries[cpuid_i++]; + switch (i) { + case 2: { + /* Keep reading function 2 till all the input is received */ + int times; + + c->function = i; + c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC | + KVM_CPUID_FLAG_STATE_READ_NEXT; + cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); + times = c->eax & 0xff; + + for (j = 1; j < times; ++j) { + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { + goto full; + } + c = &entries[cpuid_i++]; + c->function = i; + c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC; + cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); + } + break; + } + case 0x1f: + if (env->nr_dies < 2) { + cpuid_i--; + break; + } + /* fallthrough */ + case 4: + case 0xb: + case 0xd: + for (j = 0; ; j++) { + if (i == 0xd && j == 64) { + break; + } + + c->function = i; + c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + c->index = j; + cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx); + + if (i == 4 && c->eax == 0) { + break; + } + if (i == 0xb && !(c->ecx & 0xff00)) { + break; + } + if (i == 0x1f && !(c->ecx & 0xff00)) { + break; + } + if (i == 0xd && c->eax == 0) { + continue; + } + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { + goto full; + } + c = &entries[cpuid_i++]; + } + break; + case 0x12: + for (j = 0; ; j++) { + c->function = i; + c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + c->index = j; + cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx); + + if (j > 1 && (c->eax & 0xf) != 1) { + break; + } + + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { + goto full; + } + c = &entries[cpuid_i++]; + } + break; + case 0x7: + case 0x14: + case 0x1d: + case 0x1e: { + uint32_t times; + + c->function = i; + c->index = 0; + c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); + times = c->eax; + + for (j = 1; j <= times; ++j) { + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { + goto full; + } + c = &entries[cpuid_i++]; + c->function = i; + c->index = j; + c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx); + } + break; + } + default: + c->function = i; + c->flags = 0; + cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); + if (!c->eax && !c->ebx && !c->ecx && !c->edx) { + /* + * KVM already returns all zeroes if a CPUID entry is missing, + * so we can omit it and avoid hitting KVM's 80-entry limit. + */ + cpuid_i--; + } + break; + } + } + + if (limit >= 0x0a) { + uint32_t eax, edx; + + cpu_x86_cpuid(env, 0x0a, 0, &eax, &unused, &unused, &edx); + + has_architectural_pmu_version = eax & 0xff; + if (has_architectural_pmu_version > 0) { + num_architectural_pmu_gp_counters = (eax & 0xff00) >> 8; + + /* Shouldn't be more than 32, since that's the number of bits + * available in EBX to tell us _which_ counters are available. + * Play it safe. + */ + if (num_architectural_pmu_gp_counters > MAX_GP_COUNTERS) { + num_architectural_pmu_gp_counters = MAX_GP_COUNTERS; + } + + if (has_architectural_pmu_version > 1) { + num_architectural_pmu_fixed_counters = edx & 0x1f; + + if (num_architectural_pmu_fixed_counters > MAX_FIXED_COUNTERS) { + num_architectural_pmu_fixed_counters = MAX_FIXED_COUNTERS; + } + } + } + } + + cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused); + + for (i = 0x80000000; i <= limit; i++) { + j = 0; + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { + goto full; + } + c = &entries[cpuid_i++]; + + switch (i) { + case 0x8000001d: + /* Query for all AMD cache information leaves */ + for (j = 0; ; j++) { + c->function = i; + c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + c->index = j; + cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx); + + if (c->eax == 0) { + break; + } + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { + goto full; + } + c = &entries[cpuid_i++]; + } + break; + default: + c->function = i; + c->flags = 0; + cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); + if (!c->eax && !c->ebx && !c->ecx && !c->edx) { + /* + * KVM already returns all zeroes if a CPUID entry is missing, + * so we can omit it and avoid hitting KVM's 80-entry limit. + */ + cpuid_i--; + } + break; + } + } + + /* Call Centaur's CPUID instructions they are supported. */ + if (env->cpuid_xlevel2 > 0) { + cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused); + + for (i = 0xC0000000; i <= limit; i++) { + j = 0; + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { + goto full; + } + c = &entries[cpuid_i++]; + + c->function = i; + c->flags = 0; + cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); + } + } + + return cpuid_i; + +full: + fprintf(stderr, "cpuid_data is full, no space for " + "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); + abort(); +} + int kvm_arch_init_vcpu(CPUState *cs) { struct { @@ -1722,8 +1947,7 @@ int kvm_arch_init_vcpu(CPUState *cs) X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; - uint32_t limit, i, j, cpuid_i; - uint32_t unused; + uint32_t cpuid_i; struct kvm_cpuid_entry2 *c; uint32_t signature[3]; int kvm_base = KVM_CPUID_SIGNATURE; @@ -1876,8 +2100,6 @@ int kvm_arch_init_vcpu(CPUState *cs) c->edx = env->features[FEAT_KVM_HINTS]; } - cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused); - if (cpu->kvm_pv_enforce_cpuid) { r = kvm_vcpu_enable_cap(cs, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 0, 1); if (r < 0) { @@ -1888,224 +2110,7 @@ int kvm_arch_init_vcpu(CPUState *cs) } } - for (i = 0; i <= limit; i++) { - if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "unsupported level value: 0x%x\n", limit); - abort(); - } - c = &cpuid_data.entries[cpuid_i++]; - - switch (i) { - case 2: { - /* Keep reading function 2 till all the input is received */ - int times; - - c->function = i; - c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC | - KVM_CPUID_FLAG_STATE_READ_NEXT; - cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); - times = c->eax & 0xff; - - for (j = 1; j < times; ++j) { - if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "cpuid_data is full, no space for " - "cpuid(eax:2):eax & 0xf = 0x%x\n", times); - abort(); - } - c = &cpuid_data.entries[cpuid_i++]; - c->function = i; - c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC; - cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); - } - break; - } - case 0x1f: - if (env->nr_dies < 2) { - cpuid_i--; - break; - } - /* fallthrough */ - case 4: - case 0xb: - case 0xd: - for (j = 0; ; j++) { - if (i == 0xd && j == 64) { - break; - } - - c->function = i; - c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; - c->index = j; - cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx); - - if (i == 4 && c->eax == 0) { - break; - } - if (i == 0xb && !(c->ecx & 0xff00)) { - break; - } - if (i == 0x1f && !(c->ecx & 0xff00)) { - break; - } - if (i == 0xd && c->eax == 0) { - continue; - } - if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "cpuid_data is full, no space for " - "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); - abort(); - } - c = &cpuid_data.entries[cpuid_i++]; - } - break; - case 0x12: - for (j = 0; ; j++) { - c->function = i; - c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; - c->index = j; - cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx); - - if (j > 1 && (c->eax & 0xf) != 1) { - break; - } - - if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "cpuid_data is full, no space for " - "cpuid(eax:0x12,ecx:0x%x)\n", j); - abort(); - } - c = &cpuid_data.entries[cpuid_i++]; - } - break; - case 0x7: - case 0x14: - case 0x1d: - case 0x1e: { - uint32_t times; - - c->function = i; - c->index = 0; - c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; - cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); - times = c->eax; - - for (j = 1; j <= times; ++j) { - if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "cpuid_data is full, no space for " - "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); - abort(); - } - c = &cpuid_data.entries[cpuid_i++]; - c->function = i; - c->index = j; - c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; - cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx); - } - break; - } - default: - c->function = i; - c->flags = 0; - cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); - if (!c->eax && !c->ebx && !c->ecx && !c->edx) { - /* - * KVM already returns all zeroes if a CPUID entry is missing, - * so we can omit it and avoid hitting KVM's 80-entry limit. - */ - cpuid_i--; - } - break; - } - } - - if (limit >= 0x0a) { - uint32_t eax, edx; - - cpu_x86_cpuid(env, 0x0a, 0, &eax, &unused, &unused, &edx); - - has_architectural_pmu_version = eax & 0xff; - if (has_architectural_pmu_version > 0) { - num_architectural_pmu_gp_counters = (eax & 0xff00) >> 8; - - /* Shouldn't be more than 32, since that's the number of bits - * available in EBX to tell us _which_ counters are available. - * Play it safe. - */ - if (num_architectural_pmu_gp_counters > MAX_GP_COUNTERS) { - num_architectural_pmu_gp_counters = MAX_GP_COUNTERS; - } - - if (has_architectural_pmu_version > 1) { - num_architectural_pmu_fixed_counters = edx & 0x1f; - - if (num_architectural_pmu_fixed_counters > MAX_FIXED_COUNTERS) { - num_architectural_pmu_fixed_counters = MAX_FIXED_COUNTERS; - } - } - } - } - - cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused); - - for (i = 0x80000000; i <= limit; i++) { - if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "unsupported xlevel value: 0x%x\n", limit); - abort(); - } - c = &cpuid_data.entries[cpuid_i++]; - - switch (i) { - case 0x8000001d: - /* Query for all AMD cache information leaves */ - for (j = 0; ; j++) { - c->function = i; - c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; - c->index = j; - cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx); - - if (c->eax == 0) { - break; - } - if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "cpuid_data is full, no space for " - "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); - abort(); - } - c = &cpuid_data.entries[cpuid_i++]; - } - break; - default: - c->function = i; - c->flags = 0; - cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); - if (!c->eax && !c->ebx && !c->ecx && !c->edx) { - /* - * KVM already returns all zeroes if a CPUID entry is missing, - * so we can omit it and avoid hitting KVM's 80-entry limit. - */ - cpuid_i--; - } - break; - } - } - - /* Call Centaur's CPUID instructions they are supported. */ - if (env->cpuid_xlevel2 > 0) { - cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused); - - for (i = 0xC0000000; i <= limit; i++) { - if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "unsupported xlevel2 value: 0x%x\n", limit); - abort(); - } - c = &cpuid_data.entries[cpuid_i++]; - - c->function = i; - c->flags = 0; - cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); - } - } - + cpuid_i = kvm_x86_build_cpuid(env, cpuid_data.entries, cpuid_i); cpuid_data.cpuid.nent = cpuid_i; if (((env->cpuid_version >> 8)&0xF) >= 6 From c895fa54e3060c5ac6f3888dce96c9b78626072b Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Wed, 20 Mar 2024 17:31:38 +0800 Subject: [PATCH 26/63] target/i386: Introduce Icelake-Server-v7 to enable TSX When start L2 guest with both L1/L2 using Icelake-Server-v3 or above, QEMU reports below warning: "warning: host doesn't support requested feature: MSR(10AH).taa-no [bit 8]" Reason is QEMU Icelake-Server-v3 has TSX feature disabled but enables taa-no bit. It's meaningless that TSX isn't supported but still claim TSX is secure. So L1 KVM doesn't expose taa-no to L2 if TSX is unsupported, then starting L2 triggers the warning. Fix it by introducing a new version Icelake-Server-v7 which has both TSX and taa-no features. Then guest can use TSX securely when it see taa-no. This matches the production Icelake which supports TSX and isn't susceptible to TSX Async Abort (TAA) vulnerabilities, a.k.a, taa-no. Ideally, TSX should have being enabled together with taa-no since v3, but for compatibility, we'd better to add v7 to enable it. Fixes: d965dc35592d ("target/i386: Add ARCH_CAPABILITIES related bits into Icelake-Server CPU model") Tested-by: Xiangfei Ma Signed-off-by: Zhenzhong Duan Message-ID: <20240320093138.80267-2-zhenzhong.duan@intel.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index eef3d08473..3f4b121468 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -3822,6 +3822,16 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } }, }, + { + .version = 7, + .note = "TSX, taa-no", + .props = (PropValue[]) { + /* Restore TSX features removed by -v2 above */ + { "hle", "on" }, + { "rtm", "on" }, + { /* end of list */ } + }, + }, { /* end of list */ } } }, From 6e82d3b6220777667968a04c87e1667f164ebe88 Mon Sep 17 00:00:00 2001 From: Tao Su Date: Wed, 20 Mar 2024 10:10:44 +0800 Subject: [PATCH 27/63] target/i386: Add new CPU model SierraForest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to table 1-2 in Intel Architecture Instruction Set Extensions and Future Features (rev 051) [1], SierraForest has the following new features which have already been virtualized: - CMPCCXADD CPUID.(EAX=7,ECX=1):EAX[bit 7] - AVX-IFMA CPUID.(EAX=7,ECX=1):EAX[bit 23] - AVX-VNNI-INT8 CPUID.(EAX=7,ECX=1):EDX[bit 4] - AVX-NE-CONVERT CPUID.(EAX=7,ECX=1):EDX[bit 5] Add above features to new CPU model SierraForest. Comparing with GraniteRapids CPU model, SierraForest bare-metal removes the following features: - HLE CPUID.(EAX=7,ECX=0):EBX[bit 4] - RTM CPUID.(EAX=7,ECX=0):EBX[bit 11] - AVX512F CPUID.(EAX=7,ECX=0):EBX[bit 16] - AVX512DQ CPUID.(EAX=7,ECX=0):EBX[bit 17] - AVX512_IFMA CPUID.(EAX=7,ECX=0):EBX[bit 21] - AVX512CD CPUID.(EAX=7,ECX=0):EBX[bit 28] - AVX512BW CPUID.(EAX=7,ECX=0):EBX[bit 30] - AVX512VL CPUID.(EAX=7,ECX=0):EBX[bit 31] - AVX512_VBMI CPUID.(EAX=7,ECX=0):ECX[bit 1] - AVX512_VBMI2 CPUID.(EAX=7,ECX=0):ECX[bit 6] - AVX512_VNNI CPUID.(EAX=7,ECX=0):ECX[bit 11] - AVX512_BITALG CPUID.(EAX=7,ECX=0):ECX[bit 12] - AVX512_VPOPCNTDQ CPUID.(EAX=7,ECX=0):ECX[bit 14] - LA57 CPUID.(EAX=7,ECX=0):ECX[bit 16] - TSXLDTRK CPUID.(EAX=7,ECX=0):EDX[bit 16] - AMX-BF16 CPUID.(EAX=7,ECX=0):EDX[bit 22] - AVX512_FP16 CPUID.(EAX=7,ECX=0):EDX[bit 23] - AMX-TILE CPUID.(EAX=7,ECX=0):EDX[bit 24] - AMX-INT8 CPUID.(EAX=7,ECX=0):EDX[bit 25] - AVX512_BF16 CPUID.(EAX=7,ECX=1):EAX[bit 5] - fast zero-length MOVSB CPUID.(EAX=7,ECX=1):EAX[bit 10] - fast short CMPSB, SCASB CPUID.(EAX=7,ECX=1):EAX[bit 12] - AMX-FP16 CPUID.(EAX=7,ECX=1):EAX[bit 21] - PREFETCHI CPUID.(EAX=7,ECX=1):EDX[bit 14] - XFD CPUID.(EAX=0xD,ECX=1):EAX[bit 4] - EPT_PAGE_WALK_LENGTH_5 VMX_EPT_VPID_CAP(0x48c)[bit 7] Add all features of GraniteRapids CPU model except above features to SierraForest CPU model. SierraForest doesn’t support TSX and RTM but supports TAA_NO. When RTM is not enabled in host, KVM will not report TAA_NO. So, just don't include TAA_NO in SierraForest CPU model. [1] https://cdrdv2.intel.com/v1/dl/getContent/671368 Reviewed-by: Zhao Liu Reviewed-by: Xiaoyao Li Signed-off-by: Tao Su Message-ID: <20240320021044.508263-1-tao1.su@linux.intel.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 3f4b121468..c295491d8a 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -4109,6 +4109,132 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ }, }, }, + { + .name = "SierraForest", + .level = 0x23, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, + .model = 175, + .stepping = 0, + /* + * please keep the ascending order so that we can have a clear view of + * bit position of each feature. + */ + .features[FEAT_1_EDX] = + CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC | + CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | + CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | + CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX | CPUID_FXSR | + CPUID_SSE | CPUID_SSE2, + .features[FEAT_1_ECX] = + CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 | + CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID | CPUID_EXT_SSE41 | + CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE | + CPUID_EXT_POPCNT | CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_AES | + CPUID_EXT_XSAVE | CPUID_EXT_AVX | CPUID_EXT_F16C | CPUID_EXT_RDRAND, + .features[FEAT_8000_0001_EDX] = + CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB | + CPUID_EXT2_RDTSCP | CPUID_EXT2_LM, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH, + .features[FEAT_8000_0008_EBX] = + CPUID_8000_0008_EBX_WBNOINVD, + .features[FEAT_7_0_EBX] = + CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | + CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | + CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_SHA_NI, + .features[FEAT_7_0_ECX] = + CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_GFNI | + CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | + CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_BUS_LOCK_DETECT, + .features[FEAT_7_0_EDX] = + CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_SERIALIZE | + CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_ARCH_CAPABILITIES | + CPUID_7_0_EDX_SPEC_CTRL_SSBD, + .features[FEAT_ARCH_CAPABILITIES] = + MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL | + MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO | + MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_SBDR_SSDP_NO | + MSR_ARCH_CAP_FBSDP_NO | MSR_ARCH_CAP_PSDP_NO | + MSR_ARCH_CAP_PBRSB_NO, + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | + CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, + .features[FEAT_7_1_EAX] = + CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_CMPCCXADD | + CPUID_7_1_EAX_FSRS | CPUID_7_1_EAX_AVX_IFMA, + .features[FEAT_7_1_EDX] = + CPUID_7_1_EDX_AVX_VNNI_INT8 | CPUID_7_1_EDX_AVX_NE_CONVERT, + .features[FEAT_7_2_EDX] = + CPUID_7_2_EDX_MCDT_NO, + .features[FEAT_VMX_BASIC] = + MSR_VMX_BASIC_INS_OUTS | MSR_VMX_BASIC_TRUE_CTLS, + .features[FEAT_VMX_ENTRY_CTLS] = + VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_IA32E_MODE | + VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | + VMX_VM_ENTRY_LOAD_IA32_PAT | VMX_VM_ENTRY_LOAD_IA32_EFER, + .features[FEAT_VMX_EPT_VPID_CAPS] = + MSR_VMX_EPT_EXECONLY | MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | + MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | MSR_VMX_EPT_1GB | + MSR_VMX_EPT_INVEPT | MSR_VMX_EPT_AD_BITS | + MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | + MSR_VMX_EPT_INVVPID_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS, + .features[FEAT_VMX_EXIT_CTLS] = + VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | + VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | + VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_IA32_PAT | + VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | + VMX_VM_EXIT_LOAD_IA32_EFER | VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, + .features[FEAT_VMX_MISC] = + MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_ACTIVITY_HLT | + MSR_VMX_MISC_VMWRITE_VMEXIT, + .features[FEAT_VMX_PINBASED_CTLS] = + VMX_PIN_BASED_EXT_INTR_MASK | VMX_PIN_BASED_NMI_EXITING | + VMX_PIN_BASED_VIRTUAL_NMIS | VMX_PIN_BASED_VMX_PREEMPTION_TIMER | + VMX_PIN_BASED_POSTED_INTR, + .features[FEAT_VMX_PROCBASED_CTLS] = + VMX_CPU_BASED_VIRTUAL_INTR_PENDING | + VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | + VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | + VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | + VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | + VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | + VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_VIRTUAL_NMI_PENDING | + VMX_CPU_BASED_MOV_DR_EXITING | VMX_CPU_BASED_UNCOND_IO_EXITING | + VMX_CPU_BASED_USE_IO_BITMAPS | VMX_CPU_BASED_MONITOR_TRAP_FLAG | + VMX_CPU_BASED_USE_MSR_BITMAPS | VMX_CPU_BASED_MONITOR_EXITING | + VMX_CPU_BASED_PAUSE_EXITING | + VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, + .features[FEAT_VMX_SECONDARY_CTLS] = + VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + VMX_SECONDARY_EXEC_ENABLE_EPT | VMX_SECONDARY_EXEC_DESC | + VMX_SECONDARY_EXEC_RDTSCP | + VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | + VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_WBINVD_EXITING | + VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | + VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | + VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | + VMX_SECONDARY_EXEC_RDRAND_EXITING | + VMX_SECONDARY_EXEC_ENABLE_INVPCID | + VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | + VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML | + VMX_SECONDARY_EXEC_XSAVES, + .features[FEAT_VMX_VMFUNC] = + MSR_VMX_VMFUNC_EPT_SWITCHING, + .xlevel = 0x80000008, + .model_id = "Intel Xeon Processor (SierraForest)", + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { /* end of list */ }, + }, + }, { .name = "Denverton", .level = 21, From 41bdd9812863c150284a9339a048ed88c40f4df7 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Wed, 13 Mar 2024 07:53:23 -0700 Subject: [PATCH 28/63] target/i386: Export RFDS bit to guests Register File Data Sampling (RFDS) is a CPU side-channel vulnerability that may expose stale register value. CPUs that set RFDS_NO bit in MSR IA32_ARCH_CAPABILITIES indicate that they are not vulnerable to RFDS. Similarly, RFDS_CLEAR indicates that CPU is affected by RFDS, and has the microcode to help mitigate RFDS. Make RFDS_CLEAR and RFDS_NO bits available to guests. Signed-off-by: Pawan Gupta Reviewed-by: Xiaoyao Li Reviewed-by: Zhao Liu Message-ID: <9a38877857392b5c2deae7e7db1b170d15510314.1710341348.git.pawan.kumar.gupta@linux.intel.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index c295491d8a..eda15b0d4c 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1158,8 +1158,8 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { NULL, "sbdr-ssdp-no", "fbsdp-no", "psdp-no", NULL, "fb-clear", NULL, NULL, NULL, NULL, NULL, NULL, - "pbrsb-no", NULL, "gds-no", NULL, - NULL, NULL, NULL, NULL, + "pbrsb-no", NULL, "gds-no", "rfds-no", + "rfds-clear", NULL, NULL, NULL, }, .msr = { .index = MSR_IA32_ARCH_CAPABILITIES, From 42c11ae2416dcbcd694ec3ee574fe2f3e70099ae Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Wed, 20 Mar 2024 03:39:13 -0500 Subject: [PATCH 29/63] pci-host/q35: Move PAM initialization above SMRAM initialization In mch_realize(), process PAM initialization before SMRAM initialization so that later patch can skill all the SMRAM related with a single check. Signed-off-by: Isaku Yamahata Signed-off-by: Xiaoyao Li Signed-off-by: Michael Roth Message-ID: <20240320083945.991426-18-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- hw/pci-host/q35.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 0d7d4e3f08..98d4a7c253 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -568,6 +568,16 @@ static void mch_realize(PCIDevice *d, Error **errp) /* setup pci memory mapping */ pc_pci_as_mapping_init(mch->system_memory, mch->pci_address_space); + /* PAM */ + init_pam(&mch->pam_regions[0], OBJECT(mch), mch->ram_memory, + mch->system_memory, mch->pci_address_space, + PAM_BIOS_BASE, PAM_BIOS_SIZE); + for (i = 0; i < ARRAY_SIZE(mch->pam_regions) - 1; ++i) { + init_pam(&mch->pam_regions[i + 1], OBJECT(mch), mch->ram_memory, + mch->system_memory, mch->pci_address_space, + PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); + } + /* if *disabled* show SMRAM to all CPUs */ memory_region_init_alias(&mch->smram_region, OBJECT(mch), "smram-region", mch->pci_address_space, MCH_HOST_BRIDGE_SMRAM_C_BASE, @@ -634,15 +644,6 @@ static void mch_realize(PCIDevice *d, Error **errp) object_property_add_const_link(qdev_get_machine(), "smram", OBJECT(&mch->smram)); - - init_pam(&mch->pam_regions[0], OBJECT(mch), mch->ram_memory, - mch->system_memory, mch->pci_address_space, - PAM_BIOS_BASE, PAM_BIOS_SIZE); - for (i = 0; i < ARRAY_SIZE(mch->pam_regions) - 1; ++i) { - init_pam(&mch->pam_regions[i + 1], OBJECT(mch), mch->ram_memory, - mch->system_memory, mch->pci_address_space, - PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); - } } uint64_t mch_mcfg_base(void) From b07bf7b73fd02d24a7baa64a580f4974b86bbc86 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Wed, 20 Mar 2024 03:39:14 -0500 Subject: [PATCH 30/63] q35: Introduce smm_ranges property for q35-pci-host Add a q35 property to check whether or not SMM ranges, e.g. SMRAM, TSEG, etc... exist for the target platform. TDX doesn't support SMM and doesn't play nice with QEMU modifying related guest memory ranges. Signed-off-by: Isaku Yamahata Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Signed-off-by: Xiaoyao Li Signed-off-by: Michael Roth Message-ID: <20240320083945.991426-19-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- hw/i386/pc_q35.c | 2 ++ hw/pci-host/q35.c | 42 +++++++++++++++++++++++++++------------ include/hw/i386/pc.h | 1 + include/hw/pci-host/q35.h | 1 + 4 files changed, 33 insertions(+), 13 deletions(-) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 6e1180d4b6..bb53a51ac1 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -219,6 +219,8 @@ static void pc_q35_init(MachineState *machine) x86ms->above_4g_mem_size, NULL); object_property_set_bool(phb, PCI_HOST_BYPASS_IOMMU, pcms->default_bus_bypass_iommu, NULL); + object_property_set_bool(phb, PCI_HOST_PROP_SMM_RANGES, + x86_machine_is_smm_enabled(x86ms), NULL); sysbus_realize_and_unref(SYS_BUS_DEVICE(phb), &error_fatal); /* pci */ diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 98d4a7c253..0b6cbaed7e 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -179,6 +179,8 @@ static Property q35_host_props[] = { mch.below_4g_mem_size, 0), DEFINE_PROP_SIZE(PCI_HOST_ABOVE_4G_MEM_SIZE, Q35PCIHost, mch.above_4g_mem_size, 0), + DEFINE_PROP_BOOL(PCI_HOST_PROP_SMM_RANGES, Q35PCIHost, + mch.has_smm_ranges, true), DEFINE_PROP_BOOL("x-pci-hole64-fix", Q35PCIHost, pci_hole64_fix, true), DEFINE_PROP_END_OF_LIST(), }; @@ -214,6 +216,7 @@ static void q35_host_initfn(Object *obj) /* mch's object_initialize resets the default value, set it again */ qdev_prop_set_uint64(DEVICE(s), PCI_HOST_PROP_PCI_HOLE64_SIZE, Q35_PCI_HOST_HOLE64_SIZE_DEFAULT); + object_property_add(obj, PCI_HOST_PROP_PCI_HOLE_START, "uint32", q35_host_get_pci_hole_start, NULL, NULL, NULL); @@ -476,6 +479,10 @@ static void mch_write_config(PCIDevice *d, mch_update_pciexbar(mch); } + if (!mch->has_smm_ranges) { + return; + } + if (ranges_overlap(address, len, MCH_HOST_BRIDGE_SMRAM, MCH_HOST_BRIDGE_SMRAM_SIZE)) { mch_update_smram(mch); @@ -494,10 +501,13 @@ static void mch_write_config(PCIDevice *d, static void mch_update(MCHPCIState *mch) { mch_update_pciexbar(mch); + mch_update_pam(mch); - mch_update_smram(mch); - mch_update_ext_tseg_mbytes(mch); - mch_update_smbase_smram(mch); + if (mch->has_smm_ranges) { + mch_update_smram(mch); + mch_update_ext_tseg_mbytes(mch); + mch_update_smbase_smram(mch); + } /* * pci hole goes from end-of-low-ram to io-apic. @@ -538,19 +548,21 @@ static void mch_reset(DeviceState *qdev) pci_set_quad(d->config + MCH_HOST_BRIDGE_PCIEXBAR, MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT); - d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT; - d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT; - d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK; - d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK; + if (mch->has_smm_ranges) { + d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT; + d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT; + d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK; + d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK; - if (mch->ext_tseg_mbytes > 0) { - pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES, - MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY); + if (mch->ext_tseg_mbytes > 0) { + pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES, + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY); + } + + d->config[MCH_HOST_BRIDGE_F_SMBASE] = 0; + d->wmask[MCH_HOST_BRIDGE_F_SMBASE] = 0xff; } - d->config[MCH_HOST_BRIDGE_F_SMBASE] = 0; - d->wmask[MCH_HOST_BRIDGE_F_SMBASE] = 0xff; - mch_update(mch); } @@ -578,6 +590,10 @@ static void mch_realize(PCIDevice *d, Error **errp) PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); } + if (!mch->has_smm_ranges) { + return; + } + /* if *disabled* show SMRAM to all CPUs */ memory_region_init_alias(&mch->smram_region, OBJECT(mch), "smram-region", mch->pci_address_space, MCH_HOST_BRIDGE_SMRAM_C_BASE, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 349f79df08..e52290916c 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -161,6 +161,7 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level); #define PCI_HOST_PROP_PCI_HOLE64_SIZE "pci-hole64-size" #define PCI_HOST_BELOW_4G_MEM_SIZE "below-4g-mem-size" #define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size" +#define PCI_HOST_PROP_SMM_RANGES "smm-ranges" void pc_pci_as_mapping_init(MemoryRegion *system_memory, diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h index bafcbe6752..22fadfa3ed 100644 --- a/include/hw/pci-host/q35.h +++ b/include/hw/pci-host/q35.h @@ -50,6 +50,7 @@ struct MCHPCIState { MemoryRegion tseg_blackhole, tseg_window; MemoryRegion smbase_blackhole, smbase_window; bool has_smram_at_smbase; + bool has_smm_ranges; Range pci_hole; uint64_t below_4g_mem_size; uint64_t above_4g_mem_size; From 292dd287e78e0cbafde9d1522c729349d132d844 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Wed, 3 Apr 2024 10:59:53 -0400 Subject: [PATCH 31/63] hw/i386/acpi: Set PCAT_COMPAT bit only when pic is not disabled A value 1 of PCAT_COMPAT (bit 0) of MADT.Flags indicates that the system also has a PC-AT-compatible dual-8259 setup, i.e., the PIC. When PIC is not enabled (pic=off) for x86 machine, the PCAT_COMPAT bit needs to be cleared. The PIC probe should then print: [ 0.155970] Using NULL legacy PIC However, no such log printed in guest kernel unless PCAT_COMPAT is cleared. Signed-off-by: Xiaoyao Li Message-ID: <20240403145953.3082491-1-xiaoyao.li@intel.com> Signed-off-by: Paolo Bonzini --- hw/i386/acpi-common.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hw/i386/acpi-common.c b/hw/i386/acpi-common.c index 20f19269da..0cc2919bb8 100644 --- a/hw/i386/acpi-common.c +++ b/hw/i386/acpi-common.c @@ -107,7 +107,9 @@ void acpi_build_madt(GArray *table_data, BIOSLinker *linker, acpi_table_begin(&table, table_data); /* Local APIC Address */ build_append_int_noprefix(table_data, APIC_DEFAULT_ADDRESS, 4); - build_append_int_noprefix(table_data, 1 /* PCAT_COMPAT */, 4); /* Flags */ + /* Flags. bit 0: PCAT_COMPAT */ + build_append_int_noprefix(table_data, + x86ms->pic != ON_OFF_AUTO_OFF ? 1 : 0 , 4); for (i = 0; i < apic_ids->len; i++) { pc_madt_cpu_entry(i, apic_ids, table_data, false); From 41a605944e3fecae43ca18ded95ec31f28e0c7fe Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 29 Feb 2024 01:00:35 -0500 Subject: [PATCH 32/63] confidential guest support: Add kvm_init() and kvm_reset() in class Different confidential VMs in different architectures all have the same needs to do their specific initialization (and maybe resetting) stuffs with KVM. Currently each of them exposes individual *_kvm_init() functions and let machine code or kvm code to call it. To facilitate the introduction of confidential guest technology from different x86 vendors, add two virtual functions, kvm_init() and kvm_reset() in ConfidentialGuestSupportClass, and expose two helpers functions for invodking them. Signed-off-by: Xiaoyao Li Message-Id: <20240229060038.606591-1-xiaoyao.li@intel.com> Signed-off-by: Paolo Bonzini --- include/exec/confidential-guest-support.h | 34 ++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/include/exec/confidential-guest-support.h b/include/exec/confidential-guest-support.h index ba2dd4b5df..e5b188cffb 100644 --- a/include/exec/confidential-guest-support.h +++ b/include/exec/confidential-guest-support.h @@ -23,7 +23,10 @@ #include "qom/object.h" #define TYPE_CONFIDENTIAL_GUEST_SUPPORT "confidential-guest-support" -OBJECT_DECLARE_SIMPLE_TYPE(ConfidentialGuestSupport, CONFIDENTIAL_GUEST_SUPPORT) +OBJECT_DECLARE_TYPE(ConfidentialGuestSupport, + ConfidentialGuestSupportClass, + CONFIDENTIAL_GUEST_SUPPORT) + struct ConfidentialGuestSupport { Object parent; @@ -55,8 +58,37 @@ struct ConfidentialGuestSupport { typedef struct ConfidentialGuestSupportClass { ObjectClass parent; + + int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp); + int (*kvm_reset)(ConfidentialGuestSupport *cgs, Error **errp); } ConfidentialGuestSupportClass; +static inline int confidential_guest_kvm_init(ConfidentialGuestSupport *cgs, + Error **errp) +{ + ConfidentialGuestSupportClass *klass; + + klass = CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(cgs); + if (klass->kvm_init) { + return klass->kvm_init(cgs, errp); + } + + return 0; +} + +static inline int confidential_guest_kvm_reset(ConfidentialGuestSupport *cgs, + Error **errp) +{ + ConfidentialGuestSupportClass *klass; + + klass = CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(cgs); + if (klass->kvm_reset) { + return klass->kvm_reset(cgs, errp); + } + + return 0; +} + #endif /* !CONFIG_USER_ONLY */ #endif /* QEMU_CONFIDENTIAL_GUEST_SUPPORT_H */ From 637c95b37b106c2eeba313e0abb38ec12e918a59 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 29 Feb 2024 01:00:36 -0500 Subject: [PATCH 33/63] i386/sev: Switch to use confidential_guest_kvm_init() Use confidential_guest_kvm_init() instead of calling SEV specific sev_kvm_init(). This allows the introduction of multiple confidential-guest-support subclasses for different x86 vendors. As a bonus, stubs are not needed anymore since there is no direct call from target/i386/kvm/kvm.c to SEV code. Signed-off-by: Xiaoyao Li Message-Id: <20240229060038.606591-1-xiaoyao.li@intel.com> Signed-off-by: Paolo Bonzini --- target/i386/kvm/kvm.c | 10 +-- target/i386/kvm/meson.build | 2 - target/i386/kvm/sev-stub.c | 21 ------ target/i386/sev.c | 127 ++++++++++++++++++------------------ target/i386/sev.h | 2 - 5 files changed, 69 insertions(+), 93 deletions(-) delete mode 100644 target/i386/kvm/sev-stub.c diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index f1b59011d1..c64b9562c0 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -2543,10 +2543,12 @@ int kvm_arch_init(MachineState *ms, KVMState *s) * mechanisms are supported in future (e.g. TDX), they'll need * their own initialization either here or elsewhere. */ - ret = sev_kvm_init(ms->cgs, &local_err); - if (ret < 0) { - error_report_err(local_err); - return ret; + if (ms->cgs) { + ret = confidential_guest_kvm_init(ms->cgs, &local_err); + if (ret < 0) { + error_report_err(local_err); + return ret; + } } has_xcrs = kvm_check_extension(s, KVM_CAP_XCRS); diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build index 84d9143e60..e7850981e6 100644 --- a/target/i386/kvm/meson.build +++ b/target/i386/kvm/meson.build @@ -7,8 +7,6 @@ i386_kvm_ss.add(files( i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c')) -i386_kvm_ss.add(when: 'CONFIG_SEV', if_false: files('sev-stub.c')) - i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c')) i386_system_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss) diff --git a/target/i386/kvm/sev-stub.c b/target/i386/kvm/sev-stub.c deleted file mode 100644 index 1be5341e8a..0000000000 --- a/target/i386/kvm/sev-stub.c +++ /dev/null @@ -1,21 +0,0 @@ -/* - * QEMU SEV stub - * - * Copyright Advanced Micro Devices 2018 - * - * Authors: - * Brijesh Singh - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#include "qemu/osdep.h" -#include "sev.h" - -int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) -{ - /* If we get here, cgs must be some non-SEV thing */ - return 0; -} diff --git a/target/i386/sev.c b/target/i386/sev.c index 72930ff0dc..b8f79d34d1 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -353,63 +353,6 @@ static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp) sev->kernel_hashes = value; } -static void -sev_guest_class_init(ObjectClass *oc, void *data) -{ - object_class_property_add_str(oc, "sev-device", - sev_guest_get_sev_device, - sev_guest_set_sev_device); - object_class_property_set_description(oc, "sev-device", - "SEV device to use"); - object_class_property_add_str(oc, "dh-cert-file", - sev_guest_get_dh_cert_file, - sev_guest_set_dh_cert_file); - object_class_property_set_description(oc, "dh-cert-file", - "guest owners DH certificate (encoded with base64)"); - object_class_property_add_str(oc, "session-file", - sev_guest_get_session_file, - sev_guest_set_session_file); - object_class_property_set_description(oc, "session-file", - "guest owners session parameters (encoded with base64)"); - object_class_property_add_bool(oc, "kernel-hashes", - sev_guest_get_kernel_hashes, - sev_guest_set_kernel_hashes); - object_class_property_set_description(oc, "kernel-hashes", - "add kernel hashes to guest firmware for measured Linux boot"); -} - -static void -sev_guest_instance_init(Object *obj) -{ - SevGuestState *sev = SEV_GUEST(obj); - - sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE); - sev->policy = DEFAULT_GUEST_POLICY; - object_property_add_uint32_ptr(obj, "policy", &sev->policy, - OBJ_PROP_FLAG_READWRITE); - object_property_add_uint32_ptr(obj, "handle", &sev->handle, - OBJ_PROP_FLAG_READWRITE); - object_property_add_uint32_ptr(obj, "cbitpos", &sev->cbitpos, - OBJ_PROP_FLAG_READWRITE); - object_property_add_uint32_ptr(obj, "reduced-phys-bits", - &sev->reduced_phys_bits, - OBJ_PROP_FLAG_READWRITE); -} - -/* sev guest info */ -static const TypeInfo sev_guest_info = { - .parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT, - .name = TYPE_SEV_GUEST, - .instance_size = sizeof(SevGuestState), - .instance_finalize = sev_guest_finalize, - .class_init = sev_guest_class_init, - .instance_init = sev_guest_instance_init, - .interfaces = (InterfaceInfo[]) { - { TYPE_USER_CREATABLE }, - { } - } -}; - bool sev_enabled(void) { @@ -906,20 +849,15 @@ sev_vm_state_change(void *opaque, bool running, RunState state) } } -int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { - SevGuestState *sev - = (SevGuestState *)object_dynamic_cast(OBJECT(cgs), TYPE_SEV_GUEST); + SevGuestState *sev = SEV_GUEST(cgs); char *devname; int ret, fw_error, cmd; uint32_t ebx; uint32_t host_cbitpos; struct sev_user_data_status status = {}; - if (!sev) { - return 0; - } - ret = ram_block_discard_disable(true); if (ret) { error_report("%s: cannot disable RAM discard", __func__); @@ -1384,6 +1322,67 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) return ret; } +static void +sev_guest_class_init(ObjectClass *oc, void *data) +{ + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); + + klass->kvm_init = sev_kvm_init; + + object_class_property_add_str(oc, "sev-device", + sev_guest_get_sev_device, + sev_guest_set_sev_device); + object_class_property_set_description(oc, "sev-device", + "SEV device to use"); + object_class_property_add_str(oc, "dh-cert-file", + sev_guest_get_dh_cert_file, + sev_guest_set_dh_cert_file); + object_class_property_set_description(oc, "dh-cert-file", + "guest owners DH certificate (encoded with base64)"); + object_class_property_add_str(oc, "session-file", + sev_guest_get_session_file, + sev_guest_set_session_file); + object_class_property_set_description(oc, "session-file", + "guest owners session parameters (encoded with base64)"); + object_class_property_add_bool(oc, "kernel-hashes", + sev_guest_get_kernel_hashes, + sev_guest_set_kernel_hashes); + object_class_property_set_description(oc, "kernel-hashes", + "add kernel hashes to guest firmware for measured Linux boot"); +} + +static void +sev_guest_instance_init(Object *obj) +{ + SevGuestState *sev = SEV_GUEST(obj); + + sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE); + sev->policy = DEFAULT_GUEST_POLICY; + object_property_add_uint32_ptr(obj, "policy", &sev->policy, + OBJ_PROP_FLAG_READWRITE); + object_property_add_uint32_ptr(obj, "handle", &sev->handle, + OBJ_PROP_FLAG_READWRITE); + object_property_add_uint32_ptr(obj, "cbitpos", &sev->cbitpos, + OBJ_PROP_FLAG_READWRITE); + object_property_add_uint32_ptr(obj, "reduced-phys-bits", + &sev->reduced_phys_bits, + OBJ_PROP_FLAG_READWRITE); +} + +/* sev guest info */ +static const TypeInfo sev_guest_info = { + .parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT, + .name = TYPE_SEV_GUEST, + .instance_size = sizeof(SevGuestState), + .instance_finalize = sev_guest_finalize, + .class_init = sev_guest_class_init, + .instance_init = sev_guest_instance_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + static void sev_register_types(void) { diff --git a/target/i386/sev.h b/target/i386/sev.h index e7499c95b1..9e10d09539 100644 --- a/target/i386/sev.h +++ b/target/i386/sev.h @@ -57,6 +57,4 @@ int sev_inject_launch_secret(const char *hdr, const char *secret, int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size); void sev_es_set_reset_vector(CPUState *cpu); -int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); - #endif From 00a238b1a845fd5f0acd771664c5e184a63ed9b6 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 29 Feb 2024 01:00:37 -0500 Subject: [PATCH 34/63] ppc/pef: switch to use confidential_guest_kvm_init/reset() Use the unified interface to call confidential guest related kvm_init() and kvm_reset(), to avoid exposing pef specific functions. As a bonus, pef.h goes away since there is no direct call from sPAPR board code to PEF code anymore. Signed-off-by: Xiaoyao Li Signed-off-by: Paolo Bonzini --- hw/ppc/pef.c | 9 ++++++--- hw/ppc/spapr.c | 10 +++++++--- include/hw/ppc/pef.h | 17 ----------------- 3 files changed, 13 insertions(+), 23 deletions(-) delete mode 100644 include/hw/ppc/pef.h diff --git a/hw/ppc/pef.c b/hw/ppc/pef.c index d28ed3ba73..47553348b1 100644 --- a/hw/ppc/pef.c +++ b/hw/ppc/pef.c @@ -15,7 +15,6 @@ #include "sysemu/kvm.h" #include "migration/blocker.h" #include "exec/confidential-guest-support.h" -#include "hw/ppc/pef.h" #define TYPE_PEF_GUEST "pef-guest" OBJECT_DECLARE_SIMPLE_TYPE(PefGuest, PEF_GUEST) @@ -93,7 +92,7 @@ static int kvmppc_svm_off(Error **errp) #endif } -int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +static int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { if (!object_dynamic_cast(OBJECT(cgs), TYPE_PEF_GUEST)) { return 0; @@ -107,7 +106,7 @@ int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) return kvmppc_svm_init(cgs, errp); } -int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp) +static int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp) { if (!object_dynamic_cast(OBJECT(cgs), TYPE_PEF_GUEST)) { return 0; @@ -131,6 +130,10 @@ OBJECT_DEFINE_TYPE_WITH_INTERFACES(PefGuest, static void pef_guest_class_init(ObjectClass *oc, void *data) { + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); + + klass->kvm_init = pef_kvm_init; + klass->kvm_reset = pef_kvm_reset; } static void pef_guest_init(Object *obj) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 36ada4d0ba..533ea0f914 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -75,6 +75,7 @@ #include "hw/virtio/vhost-scsi-common.h" #include "exec/ram_addr.h" +#include "exec/confidential-guest-support.h" #include "hw/usb.h" #include "qemu/config-file.h" #include "qemu/error-report.h" @@ -87,7 +88,6 @@ #include "hw/ppc/spapr_tpm_proxy.h" #include "hw/ppc/spapr_nvdimm.h" #include "hw/ppc/spapr_numa.h" -#include "hw/ppc/pef.h" #include "monitor/monitor.h" @@ -1715,7 +1715,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) qemu_guest_getrandom_nofail(spapr->fdt_rng_seed, 32); } - pef_kvm_reset(machine->cgs, &error_fatal); + if (machine->cgs) { + confidential_guest_kvm_reset(machine->cgs, &error_fatal); + } spapr_caps_apply(spapr); spapr_nested_reset(spapr); @@ -2841,7 +2843,9 @@ static void spapr_machine_init(MachineState *machine) /* * if Secure VM (PEF) support is configured, then initialize it */ - pef_kvm_init(machine->cgs, &error_fatal); + if (machine->cgs) { + confidential_guest_kvm_init(machine->cgs, &error_fatal); + } msi_nonbroken = true; diff --git a/include/hw/ppc/pef.h b/include/hw/ppc/pef.h deleted file mode 100644 index 707dbe524c..0000000000 --- a/include/hw/ppc/pef.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * PEF (Protected Execution Facility) for POWER support - * - * Copyright Red Hat. - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#ifndef HW_PPC_PEF_H -#define HW_PPC_PEF_H - -int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); -int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp); - -#endif /* HW_PPC_PEF_H */ From a14a2b0148e657cc526b7a75f2a1937628764e7a Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 29 Feb 2024 01:00:38 -0500 Subject: [PATCH 35/63] s390: Switch to use confidential_guest_kvm_init() Use unified confidential_guest_kvm_init() for consistency with other architectures. Signed-off-by: Xiaoyao Li Message-Id: <20240229060038.606591-1-xiaoyao.li@intel.com> Signed-off-by: Paolo Bonzini --- hw/s390x/s390-virtio-ccw.c | 5 ++++- target/s390x/kvm/pv.c | 10 +++++++++- target/s390x/kvm/pv.h | 14 -------------- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 7cebbf6c02..4dcc213820 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -14,6 +14,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "exec/ram_addr.h" +#include "exec/confidential-guest-support.h" #include "hw/s390x/s390-virtio-hcall.h" #include "hw/s390x/sclp.h" #include "hw/s390x/s390_flic.h" @@ -260,7 +261,9 @@ static void ccw_init(MachineState *machine) s390_init_cpus(machine); /* Need CPU model to be determined before we can set up PV */ - s390_pv_init(machine->cgs, &error_fatal); + if (machine->cgs) { + confidential_guest_kvm_init(machine->cgs, &error_fatal); + } s390_flic_init(); diff --git a/target/s390x/kvm/pv.c b/target/s390x/kvm/pv.c index 7ca7faec73..dde836d21a 100644 --- a/target/s390x/kvm/pv.c +++ b/target/s390x/kvm/pv.c @@ -334,12 +334,17 @@ static bool s390_pv_guest_check(ConfidentialGuestSupport *cgs, Error **errp) return s390_pv_check_cpus(errp); } -int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +static int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { if (!object_dynamic_cast(OBJECT(cgs), TYPE_S390_PV_GUEST)) { return 0; } + if (!kvm_enabled()) { + error_setg(errp, "Protected Virtualization requires KVM"); + return -1; + } + if (!s390_has_feat(S390_FEAT_UNPACK)) { error_setg(errp, "CPU model does not support Protected Virtualization"); @@ -364,6 +369,9 @@ OBJECT_DEFINE_TYPE_WITH_INTERFACES(S390PVGuest, static void s390_pv_guest_class_init(ObjectClass *oc, void *data) { + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); + + klass->kvm_init = s390_pv_kvm_init; } static void s390_pv_guest_init(Object *obj) diff --git a/target/s390x/kvm/pv.h b/target/s390x/kvm/pv.h index 5877d28ff1..4b40817439 100644 --- a/target/s390x/kvm/pv.h +++ b/target/s390x/kvm/pv.h @@ -80,18 +80,4 @@ static inline int kvm_s390_dump_mem_state(uint64_t addr, size_t len, static inline int kvm_s390_dump_completion_data(void *buff) { return 0; } #endif /* CONFIG_KVM */ -int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); -static inline int s390_pv_init(ConfidentialGuestSupport *cgs, Error **errp) -{ - if (!cgs) { - return 0; - } - if (kvm_enabled()) { - return s390_pv_kvm_init(cgs, errp); - } - - error_setg(errp, "Protected Virtualization requires KVM"); - return -1; -} - #endif /* HW_S390_PV_H */ From 66210a1a30f2384bb59f9dad8d769dba56dd30f1 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Sun, 18 Feb 2024 23:35:02 -0600 Subject: [PATCH 36/63] scripts/update-linux-headers: Add setup_data.h to import list Data structures like struct setup_data have been moved to a separate setup_data.h header which bootparam.h relies on. Add setup_data.h to the cp_portable() list and sync it along with the other header files. Note that currently struct setup_data is stripped away as part of generating bootparam.h, but that handling is no currently needed for setup_data.h since it doesn't pull in many external headers/dependencies. However, QEMU currently redefines struct setup_data in hw/i386/x86.c, so that will need to be removed as part of any header update that pulls in the new setup_data.h to avoid build bisect breakage. Because is the first architecture specific #include in include/standard-headers/, add a new sed substitution to rewrite asm/ include to the standard-headers/asm-* subdirectory for the current architecture. And while at it, remove asm-generic/kvm_para.h from the list of allowed includes: it does not have a matching substitution, and therefore it would not be possible to use it on non-Linux systems where there is no /usr/include/asm-generic/ directory. Signed-off-by: Michael Roth Signed-off-by: Paolo Bonzini --- scripts/update-linux-headers.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh index a0006eec6f..d48856f9e2 100755 --- a/scripts/update-linux-headers.sh +++ b/scripts/update-linux-headers.sh @@ -61,7 +61,7 @@ cp_portable() { -e 'linux/const' \ -e 'linux/kernel' \ -e 'linux/sysinfo' \ - -e 'asm-generic/kvm_para' \ + -e 'asm/setup_data.h' \ > /dev/null then echo "Unexpected #include in input file $f". @@ -77,6 +77,7 @@ cp_portable() { -e 's/__be\([0-9][0-9]*\)/uint\1_t/g' \ -e 's/"\(input-event-codes\.h\)"/"standard-headers\/linux\/\1"/' \ -e 's/]*\)>/"standard-headers\/linux\/\1"/' \ + -e 's/]*\)>/"standard-headers\/asm-'$arch'\/\1"/' \ -e 's/__bitwise//' \ -e 's/__attribute__((packed))/QEMU_PACKED/' \ -e 's/__inline__/inline/' \ @@ -155,11 +156,14 @@ for arch in $ARCHLIST; do "$tmpdir/include/asm/bootparam.h" > "$tmpdir/bootparam.h" cp_portable "$tmpdir/bootparam.h" \ "$output/include/standard-headers/asm-$arch" + cp_portable "$tmpdir/include/asm/setup_data.h" \ + "$output/standard-headers/asm-x86" fi if [ $arch = riscv ]; then cp "$tmpdir/include/asm/ptrace.h" "$output/linux-headers/asm-riscv/" fi done +arch= rm -rf "$output/linux-headers/linux" mkdir -p "$output/linux-headers/linux" From b40b8eb609d3549ac14aab43849b20f5cba951c9 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Wed, 21 Feb 2024 10:51:38 -0600 Subject: [PATCH 37/63] scripts/update-linux-headers: Add bits.h to file imports Signed-off-by: Michael Roth Signed-off-by: Paolo Bonzini --- scripts/update-linux-headers.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh index d48856f9e2..5f20434d5c 100755 --- a/scripts/update-linux-headers.sh +++ b/scripts/update-linux-headers.sh @@ -169,7 +169,7 @@ rm -rf "$output/linux-headers/linux" mkdir -p "$output/linux-headers/linux" for header in const.h stddef.h kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h \ psci.h psp-sev.h userfaultfd.h memfd.h mman.h nvme_ioctl.h \ - vduse.h iommufd.h; do + vduse.h iommufd.h bits.h; do cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux" done From ab0c7fb22b56523f24d6e127cd4d10ecff67bf85 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 23 Apr 2024 11:46:47 +0200 Subject: [PATCH 38/63] linux-headers: update to current kvm/next Signed-off-by: Paolo Bonzini --- hw/i386/x86.c | 8 - include/standard-headers/asm-x86/bootparam.h | 17 +- include/standard-headers/asm-x86/kvm_para.h | 3 +- include/standard-headers/asm-x86/setup_data.h | 83 +++ include/standard-headers/linux/ethtool.h | 48 ++ include/standard-headers/linux/fuse.h | 39 +- .../linux/input-event-codes.h | 1 + include/standard-headers/linux/virtio_gpu.h | 2 + include/standard-headers/linux/virtio_pci.h | 10 +- include/standard-headers/linux/virtio_snd.h | 154 ++++ linux-headers/asm-arm64/kvm.h | 15 +- linux-headers/asm-arm64/sve_context.h | 11 + linux-headers/asm-generic/bitsperlong.h | 4 + linux-headers/asm-loongarch/kvm.h | 2 - linux-headers/asm-mips/kvm.h | 2 - linux-headers/asm-powerpc/kvm.h | 45 +- linux-headers/asm-riscv/kvm.h | 3 +- linux-headers/asm-s390/kvm.h | 315 +++++++- linux-headers/asm-x86/kvm.h | 328 ++++++++- linux-headers/linux/bits.h | 15 + linux-headers/linux/kvm.h | 689 +----------------- linux-headers/linux/psp-sev.h | 59 ++ linux-headers/linux/vhost.h | 7 + 23 files changed, 1120 insertions(+), 740 deletions(-) create mode 100644 include/standard-headers/asm-x86/setup_data.h create mode 100644 linux-headers/linux/bits.h diff --git a/hw/i386/x86.c b/hw/i386/x86.c index ffbda48917..84a4801977 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -679,14 +679,6 @@ DeviceState *ioapic_init_secondary(GSIState *gsi_state) return dev; } -struct setup_data { - uint64_t next; - uint32_t type; - uint32_t len; - uint8_t data[]; -} __attribute__((packed)); - - /* * The entry point into the kernel for PVH boot is different from * the native entry point. The PVH entry is defined by the x86/HVM diff --git a/include/standard-headers/asm-x86/bootparam.h b/include/standard-headers/asm-x86/bootparam.h index 0b06d2bff1..b582a105c0 100644 --- a/include/standard-headers/asm-x86/bootparam.h +++ b/include/standard-headers/asm-x86/bootparam.h @@ -2,21 +2,7 @@ #ifndef _ASM_X86_BOOTPARAM_H #define _ASM_X86_BOOTPARAM_H -/* setup_data/setup_indirect types */ -#define SETUP_NONE 0 -#define SETUP_E820_EXT 1 -#define SETUP_DTB 2 -#define SETUP_PCI 3 -#define SETUP_EFI 4 -#define SETUP_APPLE_PROPERTIES 5 -#define SETUP_JAILHOUSE 6 -#define SETUP_CC_BLOB 7 -#define SETUP_IMA 8 -#define SETUP_RNG_SEED 9 -#define SETUP_ENUM_MAX SETUP_RNG_SEED - -#define SETUP_INDIRECT (1<<31) -#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT) +#include "standard-headers/asm-x86/setup_data.h" /* ram_size flags */ #define RAMDISK_IMAGE_START_MASK 0x07FF @@ -38,6 +24,7 @@ #define XLF_EFI_KEXEC (1<<4) #define XLF_5LEVEL (1<<5) #define XLF_5LEVEL_ENABLED (1<<6) +#define XLF_MEM_ENCRYPTION (1<<7) #endif /* _ASM_X86_BOOTPARAM_H */ diff --git a/include/standard-headers/asm-x86/kvm_para.h b/include/standard-headers/asm-x86/kvm_para.h index f0235e58a1..9a011d20f0 100644 --- a/include/standard-headers/asm-x86/kvm_para.h +++ b/include/standard-headers/asm-x86/kvm_para.h @@ -92,7 +92,7 @@ struct kvm_clock_pairing { #define KVM_ASYNC_PF_DELIVERY_AS_INT (1 << 3) /* MSR_KVM_ASYNC_PF_INT */ -#define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0) +#define KVM_ASYNC_PF_VEC_MASK __GENMASK(7, 0) /* MSR_KVM_MIGRATION_CONTROL */ #define KVM_MIGRATION_READY (1 << 0) @@ -142,7 +142,6 @@ struct kvm_vcpu_pv_apf_data { uint32_t token; uint8_t pad[56]; - uint32_t enabled; }; #define KVM_PV_EOI_BIT 0 diff --git a/include/standard-headers/asm-x86/setup_data.h b/include/standard-headers/asm-x86/setup_data.h new file mode 100644 index 0000000000..09355f54c5 --- /dev/null +++ b/include/standard-headers/asm-x86/setup_data.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_X86_SETUP_DATA_H +#define _ASM_X86_SETUP_DATA_H + +/* setup_data/setup_indirect types */ +#define SETUP_NONE 0 +#define SETUP_E820_EXT 1 +#define SETUP_DTB 2 +#define SETUP_PCI 3 +#define SETUP_EFI 4 +#define SETUP_APPLE_PROPERTIES 5 +#define SETUP_JAILHOUSE 6 +#define SETUP_CC_BLOB 7 +#define SETUP_IMA 8 +#define SETUP_RNG_SEED 9 +#define SETUP_ENUM_MAX SETUP_RNG_SEED + +#define SETUP_INDIRECT (1<<31) +#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT) + +#ifndef __ASSEMBLY__ + +#include "standard-headers/linux/types.h" + +/* extensible setup data list node */ +struct setup_data { + uint64_t next; + uint32_t type; + uint32_t len; + uint8_t data[]; +}; + +/* extensible setup indirect data node */ +struct setup_indirect { + uint32_t type; + uint32_t reserved; /* Reserved, must be set to zero. */ + uint64_t len; + uint64_t addr; +}; + +/* + * The E820 memory region entry of the boot protocol ABI: + */ +struct boot_e820_entry { + uint64_t addr; + uint64_t size; + uint32_t type; +} QEMU_PACKED; + +/* + * The boot loader is passing platform information via this Jailhouse-specific + * setup data structure. + */ +struct jailhouse_setup_data { + struct { + uint16_t version; + uint16_t compatible_version; + } QEMU_PACKED hdr; + struct { + uint16_t pm_timer_address; + uint16_t num_cpus; + uint64_t pci_mmconfig_base; + uint32_t tsc_khz; + uint32_t apic_khz; + uint8_t standard_ioapic; + uint8_t cpu_ids[255]; + } QEMU_PACKED v1; + struct { + uint32_t flags; + } QEMU_PACKED v2; +} QEMU_PACKED; + +/* + * IMA buffer setup data information from the previous kernel during kexec + */ +struct ima_setup_data { + uint64_t addr; + uint64_t size; +} QEMU_PACKED; + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_SETUP_DATA_H */ diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h index dfb54eff6f..01503784d2 100644 --- a/include/standard-headers/linux/ethtool.h +++ b/include/standard-headers/linux/ethtool.h @@ -2023,6 +2023,53 @@ static inline int ethtool_validate_duplex(uint8_t duplex) #define IPV4_FLOW 0x10 /* hash only */ #define IPV6_FLOW 0x11 /* hash only */ #define ETHER_FLOW 0x12 /* spec only (ether_spec) */ + +/* Used for GTP-U IPv4 and IPv6. + * The format of GTP packets only includes + * elements such as TEID and GTP version. + * It is primarily intended for data communication of the UE. + */ +#define GTPU_V4_FLOW 0x13 /* hash only */ +#define GTPU_V6_FLOW 0x14 /* hash only */ + +/* Use for GTP-C IPv4 and v6. + * The format of these GTP packets does not include TEID. + * Primarily expected to be used for communication + * to create sessions for UE data communication, + * commonly referred to as CSR (Create Session Request). + */ +#define GTPC_V4_FLOW 0x15 /* hash only */ +#define GTPC_V6_FLOW 0x16 /* hash only */ + +/* Use for GTP-C IPv4 and v6. + * Unlike GTPC_V4_FLOW, the format of these GTP packets includes TEID. + * After session creation, it becomes this packet. + * This is mainly used for requests to realize UE handover. + */ +#define GTPC_TEID_V4_FLOW 0x17 /* hash only */ +#define GTPC_TEID_V6_FLOW 0x18 /* hash only */ + +/* Use for GTP-U and extended headers for the PSC (PDU Session Container). + * The format of these GTP packets includes TEID and QFI. + * In 5G communication using UPF (User Plane Function), + * data communication with this extended header is performed. + */ +#define GTPU_EH_V4_FLOW 0x19 /* hash only */ +#define GTPU_EH_V6_FLOW 0x1a /* hash only */ + +/* Use for GTP-U IPv4 and v6 PSC (PDU Session Container) extended headers. + * This differs from GTPU_EH_V(4|6)_FLOW in that it is distinguished by + * UL/DL included in the PSC. + * There are differences in the data included based on Downlink/Uplink, + * and can be used to distinguish packets. + * The functions described so far are useful when you want to + * handle communication from the mobile network in UPF, PGW, etc. + */ +#define GTPU_UL_V4_FLOW 0x1b /* hash only */ +#define GTPU_UL_V6_FLOW 0x1c /* hash only */ +#define GTPU_DL_V4_FLOW 0x1d /* hash only */ +#define GTPU_DL_V6_FLOW 0x1e /* hash only */ + /* Flag to enable additional fields in struct ethtool_rx_flow_spec */ #define FLOW_EXT 0x80000000 #define FLOW_MAC_EXT 0x40000000 @@ -2037,6 +2084,7 @@ static inline int ethtool_validate_duplex(uint8_t duplex) #define RXH_IP_DST (1 << 5) #define RXH_L4_B_0_1 (1 << 6) /* src port in case of TCP/UDP/SCTP */ #define RXH_L4_B_2_3 (1 << 7) /* dst port in case of TCP/UDP/SCTP */ +#define RXH_GTP_TEID (1 << 8) /* teid in case of GTP */ #define RXH_DISCARD (1 << 31) #define RX_CLS_FLOW_DISC 0xffffffffffffffffULL diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h index fc0dcd10ae..bac9dbc49f 100644 --- a/include/standard-headers/linux/fuse.h +++ b/include/standard-headers/linux/fuse.h @@ -211,6 +211,12 @@ * 7.39 * - add FUSE_DIRECT_IO_ALLOW_MMAP * - add FUSE_STATX and related structures + * + * 7.40 + * - add max_stack_depth to fuse_init_out, add FUSE_PASSTHROUGH init flag + * - add backing_id to fuse_open_out, add FOPEN_PASSTHROUGH open flag + * - add FUSE_NO_EXPORT_SUPPORT init flag + * - add FUSE_NOTIFY_RESEND, add FUSE_HAS_RESEND init flag */ #ifndef _LINUX_FUSE_H @@ -242,7 +248,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 39 +#define FUSE_KERNEL_MINOR_VERSION 40 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -349,6 +355,7 @@ struct fuse_file_lock { * FOPEN_STREAM: the file is stream-like (no file position at all) * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE) * FOPEN_PARALLEL_DIRECT_WRITES: Allow concurrent direct writes on the same inode + * FOPEN_PASSTHROUGH: passthrough read/write io for this open file */ #define FOPEN_DIRECT_IO (1 << 0) #define FOPEN_KEEP_CACHE (1 << 1) @@ -357,6 +364,7 @@ struct fuse_file_lock { #define FOPEN_STREAM (1 << 4) #define FOPEN_NOFLUSH (1 << 5) #define FOPEN_PARALLEL_DIRECT_WRITES (1 << 6) +#define FOPEN_PASSTHROUGH (1 << 7) /** * INIT request/reply flags @@ -406,6 +414,9 @@ struct fuse_file_lock { * symlink and mknod (single group that matches parent) * FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation * FUSE_DIRECT_IO_ALLOW_MMAP: allow shared mmap in FOPEN_DIRECT_IO mode. + * FUSE_NO_EXPORT_SUPPORT: explicitly disable export support + * FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit + * of the request ID indicates resend requests */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -445,6 +456,9 @@ struct fuse_file_lock { #define FUSE_CREATE_SUPP_GROUP (1ULL << 34) #define FUSE_HAS_EXPIRE_ONLY (1ULL << 35) #define FUSE_DIRECT_IO_ALLOW_MMAP (1ULL << 36) +#define FUSE_PASSTHROUGH (1ULL << 37) +#define FUSE_NO_EXPORT_SUPPORT (1ULL << 38) +#define FUSE_HAS_RESEND (1ULL << 39) /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */ #define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP @@ -631,6 +645,7 @@ enum fuse_notify_code { FUSE_NOTIFY_STORE = 4, FUSE_NOTIFY_RETRIEVE = 5, FUSE_NOTIFY_DELETE = 6, + FUSE_NOTIFY_RESEND = 7, FUSE_NOTIFY_CODE_MAX, }; @@ -757,7 +772,7 @@ struct fuse_create_in { struct fuse_open_out { uint64_t fh; uint32_t open_flags; - uint32_t padding; + int32_t backing_id; }; struct fuse_release_in { @@ -873,7 +888,8 @@ struct fuse_init_out { uint16_t max_pages; uint16_t map_alignment; uint32_t flags2; - uint32_t unused[7]; + uint32_t max_stack_depth; + uint32_t unused[6]; }; #define CUSE_INIT_INFO_MAX 4096 @@ -956,6 +972,14 @@ struct fuse_fallocate_in { uint32_t padding; }; +/** + * FUSE request unique ID flag + * + * Indicates whether this is a resend request. The receiver should handle this + * request accordingly. + */ +#define FUSE_UNIQUE_RESEND (1ULL << 63) + struct fuse_in_header { uint32_t len; uint32_t opcode; @@ -1045,9 +1069,18 @@ struct fuse_notify_retrieve_in { uint64_t dummy4; }; +struct fuse_backing_map { + int32_t fd; + uint32_t flags; + uint64_t padding; +}; + /* Device ioctls: */ #define FUSE_DEV_IOC_MAGIC 229 #define FUSE_DEV_IOC_CLONE _IOR(FUSE_DEV_IOC_MAGIC, 0, uint32_t) +#define FUSE_DEV_IOC_BACKING_OPEN _IOW(FUSE_DEV_IOC_MAGIC, 1, \ + struct fuse_backing_map) +#define FUSE_DEV_IOC_BACKING_CLOSE _IOW(FUSE_DEV_IOC_MAGIC, 2, uint32_t) struct fuse_lseek_in { uint64_t fh; diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h index f6bab08540..2221b0c383 100644 --- a/include/standard-headers/linux/input-event-codes.h +++ b/include/standard-headers/linux/input-event-codes.h @@ -602,6 +602,7 @@ #define KEY_ALS_TOGGLE 0x230 /* Ambient light sensor */ #define KEY_ROTATE_LOCK_TOGGLE 0x231 /* Display rotation lock */ +#define KEY_REFRESH_RATE_TOGGLE 0x232 /* Display refresh rate toggle */ #define KEY_BUTTONCONFIG 0x240 /* AL Button Configuration */ #define KEY_TASKMANAGER 0x241 /* AL Task/Project Manager */ diff --git a/include/standard-headers/linux/virtio_gpu.h b/include/standard-headers/linux/virtio_gpu.h index 2da48d3d4c..2db643ed8f 100644 --- a/include/standard-headers/linux/virtio_gpu.h +++ b/include/standard-headers/linux/virtio_gpu.h @@ -309,6 +309,8 @@ struct virtio_gpu_cmd_submit { #define VIRTIO_GPU_CAPSET_VIRGL 1 #define VIRTIO_GPU_CAPSET_VIRGL2 2 +/* 3 is reserved for gfxstream */ +#define VIRTIO_GPU_CAPSET_VENUS 4 /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */ struct virtio_gpu_get_capset_info { diff --git a/include/standard-headers/linux/virtio_pci.h b/include/standard-headers/linux/virtio_pci.h index 3e2bc2c97e..4010216103 100644 --- a/include/standard-headers/linux/virtio_pci.h +++ b/include/standard-headers/linux/virtio_pci.h @@ -240,7 +240,7 @@ struct virtio_pci_cfg_cap { #define VIRTIO_ADMIN_CMD_LEGACY_DEV_CFG_READ 0x5 #define VIRTIO_ADMIN_CMD_LEGACY_NOTIFY_INFO 0x6 -struct QEMU_PACKED virtio_admin_cmd_hdr { +struct virtio_admin_cmd_hdr { uint16_t opcode; /* * 1 - SR-IOV @@ -252,20 +252,20 @@ struct QEMU_PACKED virtio_admin_cmd_hdr { uint64_t group_member_id; }; -struct QEMU_PACKED virtio_admin_cmd_status { +struct virtio_admin_cmd_status { uint16_t status; uint16_t status_qualifier; /* Unused, reserved for future extensions. */ uint8_t reserved2[4]; }; -struct QEMU_PACKED virtio_admin_cmd_legacy_wr_data { +struct virtio_admin_cmd_legacy_wr_data { uint8_t offset; /* Starting offset of the register(s) to write. */ uint8_t reserved[7]; uint8_t registers[]; }; -struct QEMU_PACKED virtio_admin_cmd_legacy_rd_data { +struct virtio_admin_cmd_legacy_rd_data { uint8_t offset; /* Starting offset of the register(s) to read. */ }; @@ -275,7 +275,7 @@ struct QEMU_PACKED virtio_admin_cmd_legacy_rd_data { #define VIRTIO_ADMIN_CMD_MAX_NOTIFY_INFO 4 -struct QEMU_PACKED virtio_admin_cmd_notify_info_data { +struct virtio_admin_cmd_notify_info_data { uint8_t flags; /* 0 = end of list, 1 = owner device, 2 = member device */ uint8_t bar; /* BAR of the member or the owner device */ uint8_t padding[6]; diff --git a/include/standard-headers/linux/virtio_snd.h b/include/standard-headers/linux/virtio_snd.h index 1af96b9fc6..860f12e0a4 100644 --- a/include/standard-headers/linux/virtio_snd.h +++ b/include/standard-headers/linux/virtio_snd.h @@ -7,6 +7,14 @@ #include "standard-headers/linux/virtio_types.h" +/******************************************************************************* + * FEATURE BITS + */ +enum { + /* device supports control elements */ + VIRTIO_SND_F_CTLS = 0 +}; + /******************************************************************************* * CONFIGURATION SPACE */ @@ -17,6 +25,8 @@ struct virtio_snd_config { uint32_t streams; /* # of available channel maps */ uint32_t chmaps; + /* # of available control elements */ + uint32_t controls; }; enum { @@ -55,6 +65,15 @@ enum { /* channel map control request types */ VIRTIO_SND_R_CHMAP_INFO = 0x0200, + /* control element request types */ + VIRTIO_SND_R_CTL_INFO = 0x0300, + VIRTIO_SND_R_CTL_ENUM_ITEMS, + VIRTIO_SND_R_CTL_READ, + VIRTIO_SND_R_CTL_WRITE, + VIRTIO_SND_R_CTL_TLV_READ, + VIRTIO_SND_R_CTL_TLV_WRITE, + VIRTIO_SND_R_CTL_TLV_COMMAND, + /* jack event types */ VIRTIO_SND_EVT_JACK_CONNECTED = 0x1000, VIRTIO_SND_EVT_JACK_DISCONNECTED, @@ -63,6 +82,9 @@ enum { VIRTIO_SND_EVT_PCM_PERIOD_ELAPSED = 0x1100, VIRTIO_SND_EVT_PCM_XRUN, + /* control element event types */ + VIRTIO_SND_EVT_CTL_NOTIFY = 0x1200, + /* common status codes */ VIRTIO_SND_S_OK = 0x8000, VIRTIO_SND_S_BAD_MSG, @@ -331,4 +353,136 @@ struct virtio_snd_chmap_info { uint8_t positions[VIRTIO_SND_CHMAP_MAX_SIZE]; }; +/******************************************************************************* + * CONTROL ELEMENTS MESSAGES + */ +struct virtio_snd_ctl_hdr { + /* VIRTIO_SND_R_CTL_XXX */ + struct virtio_snd_hdr hdr; + /* 0 ... virtio_snd_config::controls - 1 */ + uint32_t control_id; +}; + +/* supported roles for control elements */ +enum { + VIRTIO_SND_CTL_ROLE_UNDEFINED = 0, + VIRTIO_SND_CTL_ROLE_VOLUME, + VIRTIO_SND_CTL_ROLE_MUTE, + VIRTIO_SND_CTL_ROLE_GAIN +}; + +/* supported value types for control elements */ +enum { + VIRTIO_SND_CTL_TYPE_BOOLEAN = 0, + VIRTIO_SND_CTL_TYPE_INTEGER, + VIRTIO_SND_CTL_TYPE_INTEGER64, + VIRTIO_SND_CTL_TYPE_ENUMERATED, + VIRTIO_SND_CTL_TYPE_BYTES, + VIRTIO_SND_CTL_TYPE_IEC958 +}; + +/* supported access rights for control elements */ +enum { + VIRTIO_SND_CTL_ACCESS_READ = 0, + VIRTIO_SND_CTL_ACCESS_WRITE, + VIRTIO_SND_CTL_ACCESS_VOLATILE, + VIRTIO_SND_CTL_ACCESS_INACTIVE, + VIRTIO_SND_CTL_ACCESS_TLV_READ, + VIRTIO_SND_CTL_ACCESS_TLV_WRITE, + VIRTIO_SND_CTL_ACCESS_TLV_COMMAND +}; + +struct virtio_snd_ctl_info { + /* common header */ + struct virtio_snd_info hdr; + /* element role (VIRTIO_SND_CTL_ROLE_XXX) */ + uint32_t role; + /* element value type (VIRTIO_SND_CTL_TYPE_XXX) */ + uint32_t type; + /* element access right bit map (1 << VIRTIO_SND_CTL_ACCESS_XXX) */ + uint32_t access; + /* # of members in the element value */ + uint32_t count; + /* index for an element with a non-unique name */ + uint32_t index; + /* name identifier string for the element */ + uint8_t name[44]; + /* additional information about the element's value */ + union { + /* VIRTIO_SND_CTL_TYPE_INTEGER */ + struct { + /* minimum supported value */ + uint32_t min; + /* maximum supported value */ + uint32_t max; + /* fixed step size for value (0 = variable size) */ + uint32_t step; + } integer; + /* VIRTIO_SND_CTL_TYPE_INTEGER64 */ + struct { + /* minimum supported value */ + uint64_t min; + /* maximum supported value */ + uint64_t max; + /* fixed step size for value (0 = variable size) */ + uint64_t step; + } integer64; + /* VIRTIO_SND_CTL_TYPE_ENUMERATED */ + struct { + /* # of options supported for value */ + uint32_t items; + } enumerated; + } value; +}; + +struct virtio_snd_ctl_enum_item { + /* option name */ + uint8_t item[64]; +}; + +struct virtio_snd_ctl_iec958 { + /* AES/IEC958 channel status bits */ + uint8_t status[24]; + /* AES/IEC958 subcode bits */ + uint8_t subcode[147]; + /* nothing */ + uint8_t pad; + /* AES/IEC958 subframe bits */ + uint8_t dig_subframe[4]; +}; + +struct virtio_snd_ctl_value { + union { + /* VIRTIO_SND_CTL_TYPE_BOOLEAN|INTEGER value */ + uint32_t integer[128]; + /* VIRTIO_SND_CTL_TYPE_INTEGER64 value */ + uint64_t integer64[64]; + /* VIRTIO_SND_CTL_TYPE_ENUMERATED value (option indexes) */ + uint32_t enumerated[128]; + /* VIRTIO_SND_CTL_TYPE_BYTES value */ + uint8_t bytes[512]; + /* VIRTIO_SND_CTL_TYPE_IEC958 value */ + struct virtio_snd_ctl_iec958 iec958; + } value; +}; + +/* supported event reason types */ +enum { + /* element's value has changed */ + VIRTIO_SND_CTL_EVT_MASK_VALUE = 0, + /* element's information has changed */ + VIRTIO_SND_CTL_EVT_MASK_INFO, + /* element's metadata has changed */ + VIRTIO_SND_CTL_EVT_MASK_TLV +}; + +struct virtio_snd_ctl_event { + /* VIRTIO_SND_EVT_CTL_NOTIFY */ + struct virtio_snd_hdr hdr; + /* 0 ... virtio_snd_config::controls - 1 */ + uint16_t control_id; + /* event reason bit map (1 << VIRTIO_SND_CTL_EVT_MASK_XXX) */ + uint16_t mask; +}; + #endif /* VIRTIO_SND_IF_H */ diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h index c59ea55cd8..2af9931ae9 100644 --- a/linux-headers/asm-arm64/kvm.h +++ b/linux-headers/asm-arm64/kvm.h @@ -37,9 +37,7 @@ #include #include -#define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_IRQ_LINE -#define __KVM_HAVE_READONLY_MEM #define __KVM_HAVE_VCPU_EVENTS #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -76,11 +74,11 @@ struct kvm_regs { /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ #define KVM_ARM_DEVICE_TYPE_SHIFT 0 -#define KVM_ARM_DEVICE_TYPE_MASK GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \ - KVM_ARM_DEVICE_TYPE_SHIFT) +#define KVM_ARM_DEVICE_TYPE_MASK __GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \ + KVM_ARM_DEVICE_TYPE_SHIFT) #define KVM_ARM_DEVICE_ID_SHIFT 16 -#define KVM_ARM_DEVICE_ID_MASK GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \ - KVM_ARM_DEVICE_ID_SHIFT) +#define KVM_ARM_DEVICE_ID_MASK __GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \ + KVM_ARM_DEVICE_ID_SHIFT) /* Supported device IDs */ #define KVM_ARM_DEVICE_VGIC_V2 0 @@ -162,6 +160,11 @@ struct kvm_sync_regs { __u64 device_irq_level; }; +/* Bits for run->s.regs.device_irq_level */ +#define KVM_ARM_DEV_EL1_VTIMER (1 << 0) +#define KVM_ARM_DEV_EL1_PTIMER (1 << 1) +#define KVM_ARM_DEV_PMU (1 << 2) + /* * PMU filter structure. Describe a range of events with a particular * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER. diff --git a/linux-headers/asm-arm64/sve_context.h b/linux-headers/asm-arm64/sve_context.h index 1d0e3e1d09..d1b1ec8cb1 100644 --- a/linux-headers/asm-arm64/sve_context.h +++ b/linux-headers/asm-arm64/sve_context.h @@ -13,6 +13,17 @@ #define __SVE_VQ_BYTES 16 /* number of bytes per quadword */ +/* + * Yes, __SVE_VQ_MAX is 512 QUADWORDS. + * + * To help ensure forward portability, this is much larger than the + * current maximum value defined by the SVE architecture. While arrays + * or static allocations can be sized based on this value, watch out! + * It will waste a surprisingly large amount of memory. + * + * Dynamic sizing based on the actual runtime vector length is likely to + * be preferable for most purposes. + */ #define __SVE_VQ_MIN 1 #define __SVE_VQ_MAX 512 diff --git a/linux-headers/asm-generic/bitsperlong.h b/linux-headers/asm-generic/bitsperlong.h index 75f320fa91..1fb4f0c9f2 100644 --- a/linux-headers/asm-generic/bitsperlong.h +++ b/linux-headers/asm-generic/bitsperlong.h @@ -24,4 +24,8 @@ #endif #endif +#ifndef __BITS_PER_LONG_LONG +#define __BITS_PER_LONG_LONG 64 +#endif + #endif /* __ASM_GENERIC_BITS_PER_LONG */ diff --git a/linux-headers/asm-loongarch/kvm.h b/linux-headers/asm-loongarch/kvm.h index 923d0bd382..109785922c 100644 --- a/linux-headers/asm-loongarch/kvm.h +++ b/linux-headers/asm-loongarch/kvm.h @@ -14,8 +14,6 @@ * Some parts derived from the x86 version of this file. */ -#define __KVM_HAVE_READONLY_MEM - #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_DIRTY_LOG_PAGE_OFFSET 64 diff --git a/linux-headers/asm-mips/kvm.h b/linux-headers/asm-mips/kvm.h index edcf717c43..9673dc9cb3 100644 --- a/linux-headers/asm-mips/kvm.h +++ b/linux-headers/asm-mips/kvm.h @@ -20,8 +20,6 @@ * Some parts derived from the x86 version of this file. */ -#define __KVM_HAVE_READONLY_MEM - #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 /* diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h index 9f18fa090f..1691297a76 100644 --- a/linux-headers/asm-powerpc/kvm.h +++ b/linux-headers/asm-powerpc/kvm.h @@ -28,7 +28,6 @@ #define __KVM_HAVE_PPC_SMT #define __KVM_HAVE_IRQCHIP #define __KVM_HAVE_IRQ_LINE -#define __KVM_HAVE_GUEST_DEBUG /* Not always available, but if it is, this is the correct offset. */ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -733,4 +732,48 @@ struct kvm_ppc_xive_eq { #define KVM_XIVE_TIMA_PAGE_OFFSET 0 #define KVM_XIVE_ESB_PAGE_OFFSET 4 +/* for KVM_PPC_GET_PVINFO */ + +#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) + +struct kvm_ppc_pvinfo { + /* out */ + __u32 flags; + __u32 hcall[4]; + __u8 pad[108]; +}; + +/* for KVM_PPC_GET_SMMU_INFO */ +#define KVM_PPC_PAGE_SIZES_MAX_SZ 8 + +struct kvm_ppc_one_page_size { + __u32 page_shift; /* Page shift (or 0) */ + __u32 pte_enc; /* Encoding in the HPTE (>>12) */ +}; + +struct kvm_ppc_one_seg_page_size { + __u32 page_shift; /* Base page shift of segment (or 0) */ + __u32 slb_enc; /* SLB encoding for BookS */ + struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ]; +}; + +#define KVM_PPC_PAGE_SIZES_REAL 0x00000001 +#define KVM_PPC_1T_SEGMENTS 0x00000002 +#define KVM_PPC_NO_HASH 0x00000004 + +struct kvm_ppc_smmu_info { + __u64 flags; + __u32 slb_size; + __u16 data_keys; /* # storage keys supported for data */ + __u16 instr_keys; /* # storage keys supported for instructions */ + struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; +}; + +/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */ +struct kvm_ppc_resize_hpt { + __u64 flags; + __u32 shift; + __u32 pad; +}; + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h index 7499e88a94..b1c503c295 100644 --- a/linux-headers/asm-riscv/kvm.h +++ b/linux-headers/asm-riscv/kvm.h @@ -16,7 +16,6 @@ #include #define __KVM_HAVE_IRQ_LINE -#define __KVM_HAVE_READONLY_MEM #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -166,6 +165,8 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_ZVFH, KVM_RISCV_ISA_EXT_ZVFHMIN, KVM_RISCV_ISA_EXT_ZFA, + KVM_RISCV_ISA_EXT_ZTSO, + KVM_RISCV_ISA_EXT_ZACAS, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h index 023a2763a9..684c4e1205 100644 --- a/linux-headers/asm-s390/kvm.h +++ b/linux-headers/asm-s390/kvm.h @@ -12,7 +12,320 @@ #include #define __KVM_S390 -#define __KVM_HAVE_GUEST_DEBUG + +struct kvm_s390_skeys { + __u64 start_gfn; + __u64 count; + __u64 skeydata_addr; + __u32 flags; + __u32 reserved[9]; +}; + +#define KVM_S390_CMMA_PEEK (1 << 0) + +/** + * kvm_s390_cmma_log - Used for CMMA migration. + * + * Used both for input and output. + * + * @start_gfn: Guest page number to start from. + * @count: Size of the result buffer. + * @flags: Control operation mode via KVM_S390_CMMA_* flags + * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty + * pages are still remaining. + * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set + * in the PGSTE. + * @values: Pointer to the values buffer. + * + * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls. + */ +struct kvm_s390_cmma_log { + __u64 start_gfn; + __u32 count; + __u32 flags; + union { + __u64 remaining; + __u64 mask; + }; + __u64 values; +}; + +#define KVM_S390_RESET_POR 1 +#define KVM_S390_RESET_CLEAR 2 +#define KVM_S390_RESET_SUBSYSTEM 4 +#define KVM_S390_RESET_CPU_INIT 8 +#define KVM_S390_RESET_IPL 16 + +/* for KVM_S390_MEM_OP */ +struct kvm_s390_mem_op { + /* in */ + __u64 gaddr; /* the guest address */ + __u64 flags; /* flags */ + __u32 size; /* amount of bytes */ + __u32 op; /* type of operation */ + __u64 buf; /* buffer in userspace */ + union { + struct { + __u8 ar; /* the access register number */ + __u8 key; /* access key, ignored if flag unset */ + __u8 pad1[6]; /* ignored */ + __u64 old_addr; /* ignored if cmpxchg flag unset */ + }; + __u32 sida_offset; /* offset into the sida */ + __u8 reserved[32]; /* ignored */ + }; +}; +/* types for kvm_s390_mem_op->op */ +#define KVM_S390_MEMOP_LOGICAL_READ 0 +#define KVM_S390_MEMOP_LOGICAL_WRITE 1 +#define KVM_S390_MEMOP_SIDA_READ 2 +#define KVM_S390_MEMOP_SIDA_WRITE 3 +#define KVM_S390_MEMOP_ABSOLUTE_READ 4 +#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 +#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG 6 + +/* flags for kvm_s390_mem_op->flags */ +#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) +#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) +#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) + +/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */ +#define KVM_S390_MEMOP_EXTENSION_CAP_BASE (1 << 0) +#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG (1 << 1) + +struct kvm_s390_psw { + __u64 mask; + __u64 addr; +}; + +/* valid values for type in kvm_s390_interrupt */ +#define KVM_S390_SIGP_STOP 0xfffe0000u +#define KVM_S390_PROGRAM_INT 0xfffe0001u +#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u +#define KVM_S390_RESTART 0xfffe0003u +#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u +#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u +#define KVM_S390_MCHK 0xfffe1000u +#define KVM_S390_INT_CLOCK_COMP 0xffff1004u +#define KVM_S390_INT_CPU_TIMER 0xffff1005u +#define KVM_S390_INT_VIRTIO 0xffff2603u +#define KVM_S390_INT_SERVICE 0xffff2401u +#define KVM_S390_INT_EMERGENCY 0xffff1201u +#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u +/* Anything below 0xfffe0000u is taken by INT_IO */ +#define KVM_S390_INT_IO(ai,cssid,ssid,schid) \ + (((schid)) | \ + ((ssid) << 16) | \ + ((cssid) << 18) | \ + ((ai) << 26)) +#define KVM_S390_INT_IO_MIN 0x00000000u +#define KVM_S390_INT_IO_MAX 0xfffdffffu +#define KVM_S390_INT_IO_AI_MASK 0x04000000u + + +struct kvm_s390_interrupt { + __u32 type; + __u32 parm; + __u64 parm64; +}; + +struct kvm_s390_io_info { + __u16 subchannel_id; + __u16 subchannel_nr; + __u32 io_int_parm; + __u32 io_int_word; +}; + +struct kvm_s390_ext_info { + __u32 ext_params; + __u32 pad; + __u64 ext_params2; +}; + +struct kvm_s390_pgm_info { + __u64 trans_exc_code; + __u64 mon_code; + __u64 per_address; + __u32 data_exc_code; + __u16 code; + __u16 mon_class_nr; + __u8 per_code; + __u8 per_atmid; + __u8 exc_access_id; + __u8 per_access_id; + __u8 op_access_id; +#define KVM_S390_PGM_FLAGS_ILC_VALID 0x01 +#define KVM_S390_PGM_FLAGS_ILC_0 0x02 +#define KVM_S390_PGM_FLAGS_ILC_1 0x04 +#define KVM_S390_PGM_FLAGS_ILC_MASK 0x06 +#define KVM_S390_PGM_FLAGS_NO_REWIND 0x08 + __u8 flags; + __u8 pad[2]; +}; + +struct kvm_s390_prefix_info { + __u32 address; +}; + +struct kvm_s390_extcall_info { + __u16 code; +}; + +struct kvm_s390_emerg_info { + __u16 code; +}; + +#define KVM_S390_STOP_FLAG_STORE_STATUS 0x01 +struct kvm_s390_stop_info { + __u32 flags; +}; + +struct kvm_s390_mchk_info { + __u64 cr14; + __u64 mcic; + __u64 failing_storage_address; + __u32 ext_damage_code; + __u32 pad; + __u8 fixed_logout[16]; +}; + +struct kvm_s390_irq { + __u64 type; + union { + struct kvm_s390_io_info io; + struct kvm_s390_ext_info ext; + struct kvm_s390_pgm_info pgm; + struct kvm_s390_emerg_info emerg; + struct kvm_s390_extcall_info extcall; + struct kvm_s390_prefix_info prefix; + struct kvm_s390_stop_info stop; + struct kvm_s390_mchk_info mchk; + char reserved[64]; + } u; +}; + +struct kvm_s390_irq_state { + __u64 buf; + __u32 flags; /* will stay unused for compatibility reasons */ + __u32 len; + __u32 reserved[4]; /* will stay unused for compatibility reasons */ +}; + +struct kvm_s390_ucas_mapping { + __u64 user_addr; + __u64 vcpu_addr; + __u64 length; +}; + +struct kvm_s390_pv_sec_parm { + __u64 origin; + __u64 length; +}; + +struct kvm_s390_pv_unp { + __u64 addr; + __u64 size; + __u64 tweak; +}; + +enum pv_cmd_dmp_id { + KVM_PV_DUMP_INIT, + KVM_PV_DUMP_CONFIG_STOR_STATE, + KVM_PV_DUMP_COMPLETE, + KVM_PV_DUMP_CPU, +}; + +struct kvm_s390_pv_dmp { + __u64 subcmd; + __u64 buff_addr; + __u64 buff_len; + __u64 gaddr; /* For dump storage state */ + __u64 reserved[4]; +}; + +enum pv_cmd_info_id { + KVM_PV_INFO_VM, + KVM_PV_INFO_DUMP, +}; + +struct kvm_s390_pv_info_dump { + __u64 dump_cpu_buffer_len; + __u64 dump_config_mem_buffer_per_1m; + __u64 dump_config_finalize_len; +}; + +struct kvm_s390_pv_info_vm { + __u64 inst_calls_list[4]; + __u64 max_cpus; + __u64 max_guests; + __u64 max_guest_addr; + __u64 feature_indication; +}; + +struct kvm_s390_pv_info_header { + __u32 id; + __u32 len_max; + __u32 len_written; + __u32 reserved; +}; + +struct kvm_s390_pv_info { + struct kvm_s390_pv_info_header header; + union { + struct kvm_s390_pv_info_dump dump; + struct kvm_s390_pv_info_vm vm; + }; +}; + +enum pv_cmd_id { + KVM_PV_ENABLE, + KVM_PV_DISABLE, + KVM_PV_SET_SEC_PARMS, + KVM_PV_UNPACK, + KVM_PV_VERIFY, + KVM_PV_PREP_RESET, + KVM_PV_UNSHARE_ALL, + KVM_PV_INFO, + KVM_PV_DUMP, + KVM_PV_ASYNC_CLEANUP_PREPARE, + KVM_PV_ASYNC_CLEANUP_PERFORM, +}; + +struct kvm_pv_cmd { + __u32 cmd; /* Command to be executed */ + __u16 rc; /* Ultravisor return code */ + __u16 rrc; /* Ultravisor return reason code */ + __u64 data; /* Data or address */ + __u32 flags; /* flags for future extensions. Must be 0 for now */ + __u32 reserved[3]; +}; + +struct kvm_s390_zpci_op { + /* in */ + __u32 fh; /* target device */ + __u8 op; /* operation to perform */ + __u8 pad[3]; + union { + /* for KVM_S390_ZPCIOP_REG_AEN */ + struct { + __u64 ibv; /* Guest addr of interrupt bit vector */ + __u64 sb; /* Guest addr of summary bit */ + __u32 flags; + __u32 noi; /* Number of interrupts */ + __u8 isc; /* Guest interrupt subclass */ + __u8 sbo; /* Offset of guest summary bit vector */ + __u16 pad; + } reg_aen; + __u64 reserved[8]; + } u; +}; + +/* types for kvm_s390_zpci_op->op */ +#define KVM_S390_ZPCIOP_REG_AEN 0 +#define KVM_S390_ZPCIOP_DEREG_AEN 1 + +/* flags for kvm_s390_zpci_op->u.reg_aen.flags */ +#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) /* Device control API: s390-specific devices */ #define KVM_DEV_FLIC_GET_ALL_IRQS 1 diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index 003fb74534..31c95c2dfe 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -7,6 +7,8 @@ * */ +#include +#include #include #include #include @@ -40,7 +42,6 @@ #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_MSI #define __KVM_HAVE_USER_NMI -#define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_MSIX #define __KVM_HAVE_MCE #define __KVM_HAVE_PIT_STATE2 @@ -49,7 +50,6 @@ #define __KVM_HAVE_DEBUGREGS #define __KVM_HAVE_XSAVE #define __KVM_HAVE_XCRS -#define __KVM_HAVE_READONLY_MEM /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 @@ -455,8 +455,13 @@ struct kvm_sync_regs { #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 -/* attributes for system fd (group 0) */ -#define KVM_X86_XCOMP_GUEST_SUPP 0 +/* vendor-independent attributes for system fd (group 0) */ +#define KVM_X86_GRP_SYSTEM 0 +# define KVM_X86_XCOMP_GUEST_SUPP 0 + +/* vendor-specific groups and attributes for system fd */ +#define KVM_X86_GRP_SEV 1 +# define KVM_X86_SEV_VMSA_FEATURES 0 struct kvm_vmx_nested_state_data { __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; @@ -524,9 +529,310 @@ struct kvm_pmu_event_filter { #define KVM_PMU_EVENT_ALLOW 0 #define KVM_PMU_EVENT_DENY 1 -#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS BIT(0) +#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS _BITUL(0) #define KVM_PMU_EVENT_FLAGS_VALID_MASK (KVM_PMU_EVENT_FLAG_MASKED_EVENTS) +/* for KVM_CAP_MCE */ +struct kvm_x86_mce { + __u64 status; + __u64 addr; + __u64 misc; + __u64 mcg_status; + __u8 bank; + __u8 pad1[7]; + __u64 pad2[3]; +}; + +/* for KVM_CAP_XEN_HVM */ +#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) +#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) +#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) +#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) +#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) +#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) +#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) +#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) +#define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8) + +struct kvm_xen_hvm_config { + __u32 flags; + __u32 msr; + __u64 blob_addr_32; + __u64 blob_addr_64; + __u8 blob_size_32; + __u8 blob_size_64; + __u8 pad2[30]; +}; + +struct kvm_xen_hvm_attr { + __u16 type; + __u16 pad[3]; + union { + __u8 long_mode; + __u8 vector; + __u8 runstate_update_flag; + union { + __u64 gfn; +#define KVM_XEN_INVALID_GFN ((__u64)-1) + __u64 hva; + } shared_info; + struct { + __u32 send_port; + __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */ + __u32 flags; +#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0) +#define KVM_XEN_EVTCHN_UPDATE (1 << 1) +#define KVM_XEN_EVTCHN_RESET (1 << 2) + /* + * Events sent by the guest are either looped back to + * the guest itself (potentially on a different port#) + * or signalled via an eventfd. + */ + union { + struct { + __u32 port; + __u32 vcpu; + __u32 priority; + } port; + struct { + __u32 port; /* Zero for eventfd */ + __s32 fd; + } eventfd; + __u32 padding[4]; + } deliver; + } evtchn; + __u32 xen_version; + __u64 pad[8]; + } u; +}; + + +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 +#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 +#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ +#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3 +#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */ +#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */ +#define KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA 0x6 + +struct kvm_xen_vcpu_attr { + __u16 type; + __u16 pad[3]; + union { + __u64 gpa; +#define KVM_XEN_INVALID_GPA ((__u64)-1) + __u64 hva; + __u64 pad[8]; + struct { + __u64 state; + __u64 state_entry_time; + __u64 time_running; + __u64 time_runnable; + __u64 time_blocked; + __u64 time_offline; + } runstate; + __u32 vcpu_id; + struct { + __u32 port; + __u32 priority; + __u64 expires_ns; + } timer; + __u8 vector; + } u; +}; + +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6 +#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7 +#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */ +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA 0x9 + +/* Secure Encrypted Virtualization command */ +enum sev_cmd_id { + /* Guest initialization commands */ + KVM_SEV_INIT = 0, + KVM_SEV_ES_INIT, + /* Guest launch commands */ + KVM_SEV_LAUNCH_START, + KVM_SEV_LAUNCH_UPDATE_DATA, + KVM_SEV_LAUNCH_UPDATE_VMSA, + KVM_SEV_LAUNCH_SECRET, + KVM_SEV_LAUNCH_MEASURE, + KVM_SEV_LAUNCH_FINISH, + /* Guest migration commands (outgoing) */ + KVM_SEV_SEND_START, + KVM_SEV_SEND_UPDATE_DATA, + KVM_SEV_SEND_UPDATE_VMSA, + KVM_SEV_SEND_FINISH, + /* Guest migration commands (incoming) */ + KVM_SEV_RECEIVE_START, + KVM_SEV_RECEIVE_UPDATE_DATA, + KVM_SEV_RECEIVE_UPDATE_VMSA, + KVM_SEV_RECEIVE_FINISH, + /* Guest status and debug commands */ + KVM_SEV_GUEST_STATUS, + KVM_SEV_DBG_DECRYPT, + KVM_SEV_DBG_ENCRYPT, + /* Guest certificates commands */ + KVM_SEV_CERT_EXPORT, + /* Attestation report */ + KVM_SEV_GET_ATTESTATION_REPORT, + /* Guest Migration Extension */ + KVM_SEV_SEND_CANCEL, + + /* Second time is the charm; improved versions of the above ioctls. */ + KVM_SEV_INIT2, + + KVM_SEV_NR_MAX, +}; + +struct kvm_sev_cmd { + __u32 id; + __u32 pad0; + __u64 data; + __u32 error; + __u32 sev_fd; +}; + +struct kvm_sev_init { + __u64 vmsa_features; + __u32 flags; + __u32 pad[9]; +}; + +struct kvm_sev_launch_start { + __u32 handle; + __u32 policy; + __u64 dh_uaddr; + __u32 dh_len; + __u32 pad0; + __u64 session_uaddr; + __u32 session_len; + __u32 pad1; +}; + +struct kvm_sev_launch_update_data { + __u64 uaddr; + __u32 len; + __u32 pad0; +}; + + +struct kvm_sev_launch_secret { + __u64 hdr_uaddr; + __u32 hdr_len; + __u32 pad0; + __u64 guest_uaddr; + __u32 guest_len; + __u32 pad1; + __u64 trans_uaddr; + __u32 trans_len; + __u32 pad2; +}; + +struct kvm_sev_launch_measure { + __u64 uaddr; + __u32 len; + __u32 pad0; +}; + +struct kvm_sev_guest_status { + __u32 handle; + __u32 policy; + __u32 state; +}; + +struct kvm_sev_dbg { + __u64 src_uaddr; + __u64 dst_uaddr; + __u32 len; + __u32 pad0; +}; + +struct kvm_sev_attestation_report { + __u8 mnonce[16]; + __u64 uaddr; + __u32 len; + __u32 pad0; +}; + +struct kvm_sev_send_start { + __u32 policy; + __u32 pad0; + __u64 pdh_cert_uaddr; + __u32 pdh_cert_len; + __u32 pad1; + __u64 plat_certs_uaddr; + __u32 plat_certs_len; + __u32 pad2; + __u64 amd_certs_uaddr; + __u32 amd_certs_len; + __u32 pad3; + __u64 session_uaddr; + __u32 session_len; + __u32 pad4; +}; + +struct kvm_sev_send_update_data { + __u64 hdr_uaddr; + __u32 hdr_len; + __u32 pad0; + __u64 guest_uaddr; + __u32 guest_len; + __u32 pad1; + __u64 trans_uaddr; + __u32 trans_len; + __u32 pad2; +}; + +struct kvm_sev_receive_start { + __u32 handle; + __u32 policy; + __u64 pdh_uaddr; + __u32 pdh_len; + __u32 pad0; + __u64 session_uaddr; + __u32 session_len; + __u32 pad1; +}; + +struct kvm_sev_receive_update_data { + __u64 hdr_uaddr; + __u32 hdr_len; + __u32 pad0; + __u64 guest_uaddr; + __u32 guest_len; + __u32 pad1; + __u64 trans_uaddr; + __u32 trans_len; + __u32 pad2; +}; + +#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) +#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) + +struct kvm_hyperv_eventfd { + __u32 conn_id; + __s32 fd; + __u32 flags; + __u32 padding[3]; +}; + +#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff +#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) + /* * Masked event layout. * Bits Description @@ -547,10 +853,10 @@ struct kvm_pmu_event_filter { ((__u64)(!!(exclude)) << 55)) #define KVM_PMU_MASKED_ENTRY_EVENT_SELECT \ - (GENMASK_ULL(7, 0) | GENMASK_ULL(35, 32)) -#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (GENMASK_ULL(63, 56)) -#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (GENMASK_ULL(15, 8)) -#define KVM_PMU_MASKED_ENTRY_EXCLUDE (BIT_ULL(55)) + (__GENMASK_ULL(7, 0) | __GENMASK_ULL(35, 32)) +#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (__GENMASK_ULL(63, 56)) +#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (__GENMASK_ULL(15, 8)) +#define KVM_PMU_MASKED_ENTRY_EXCLUDE (_BITULL(55)) #define KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT (56) /* for KVM_{GET,SET,HAS}_DEVICE_ATTR */ @@ -558,9 +864,11 @@ struct kvm_pmu_event_filter { #define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */ /* x86-specific KVM_EXIT_HYPERCALL flags. */ -#define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0) +#define KVM_EXIT_HYPERCALL_LONG_MODE _BITULL(0) #define KVM_X86_DEFAULT_VM 0 #define KVM_X86_SW_PROTECTED_VM 1 +#define KVM_X86_SEV_VM 2 +#define KVM_X86_SEV_ES_VM 3 #endif /* _ASM_X86_KVM_H */ diff --git a/linux-headers/linux/bits.h b/linux-headers/linux/bits.h new file mode 100644 index 0000000000..d9897771be --- /dev/null +++ b/linux-headers/linux/bits.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* bits.h: Macros for dealing with bitmasks. */ + +#ifndef _LINUX_BITS_H +#define _LINUX_BITS_H + +#define __GENMASK(h, l) \ + (((~_UL(0)) - (_UL(1) << (l)) + 1) & \ + (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) + +#define __GENMASK_ULL(h, l) \ + (((~_ULL(0)) - (_ULL(1) << (l)) + 1) & \ + (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) + +#endif /* _LINUX_BITS_H */ diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 17839229b2..038731cdef 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -16,6 +16,11 @@ #define KVM_API_VERSION 12 +/* + * Backwards-compatible definitions. + */ +#define __KVM_HAVE_GUEST_DEBUG + /* for KVM_SET_USER_MEMORY_REGION */ struct kvm_userspace_memory_region { __u32 slot; @@ -85,43 +90,6 @@ struct kvm_pit_config { #define KVM_PIT_SPEAKER_DUMMY 1 -struct kvm_s390_skeys { - __u64 start_gfn; - __u64 count; - __u64 skeydata_addr; - __u32 flags; - __u32 reserved[9]; -}; - -#define KVM_S390_CMMA_PEEK (1 << 0) - -/** - * kvm_s390_cmma_log - Used for CMMA migration. - * - * Used both for input and output. - * - * @start_gfn: Guest page number to start from. - * @count: Size of the result buffer. - * @flags: Control operation mode via KVM_S390_CMMA_* flags - * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty - * pages are still remaining. - * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set - * in the PGSTE. - * @values: Pointer to the values buffer. - * - * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls. - */ -struct kvm_s390_cmma_log { - __u64 start_gfn; - __u32 count; - __u32 flags; - union { - __u64 remaining; - __u64 mask; - }; - __u64 values; -}; - struct kvm_hyperv_exit { #define KVM_EXIT_HYPERV_SYNIC 1 #define KVM_EXIT_HYPERV_HCALL 2 @@ -313,11 +281,6 @@ struct kvm_run { __u32 ipb; } s390_sieic; /* KVM_EXIT_S390_RESET */ -#define KVM_S390_RESET_POR 1 -#define KVM_S390_RESET_CLEAR 2 -#define KVM_S390_RESET_SUBSYSTEM 4 -#define KVM_S390_RESET_CPU_INIT 8 -#define KVM_S390_RESET_IPL 16 __u64 s390_reset_flags; /* KVM_EXIT_S390_UCONTROL */ struct { @@ -532,43 +495,6 @@ struct kvm_translation { __u8 pad[5]; }; -/* for KVM_S390_MEM_OP */ -struct kvm_s390_mem_op { - /* in */ - __u64 gaddr; /* the guest address */ - __u64 flags; /* flags */ - __u32 size; /* amount of bytes */ - __u32 op; /* type of operation */ - __u64 buf; /* buffer in userspace */ - union { - struct { - __u8 ar; /* the access register number */ - __u8 key; /* access key, ignored if flag unset */ - __u8 pad1[6]; /* ignored */ - __u64 old_addr; /* ignored if cmpxchg flag unset */ - }; - __u32 sida_offset; /* offset into the sida */ - __u8 reserved[32]; /* ignored */ - }; -}; -/* types for kvm_s390_mem_op->op */ -#define KVM_S390_MEMOP_LOGICAL_READ 0 -#define KVM_S390_MEMOP_LOGICAL_WRITE 1 -#define KVM_S390_MEMOP_SIDA_READ 2 -#define KVM_S390_MEMOP_SIDA_WRITE 3 -#define KVM_S390_MEMOP_ABSOLUTE_READ 4 -#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 -#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG 6 - -/* flags for kvm_s390_mem_op->flags */ -#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) -#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) -#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) - -/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */ -#define KVM_S390_MEMOP_EXTENSION_CAP_BASE (1 << 0) -#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG (1 << 1) - /* for KVM_INTERRUPT */ struct kvm_interrupt { /* in */ @@ -633,124 +559,6 @@ struct kvm_mp_state { __u32 mp_state; }; -struct kvm_s390_psw { - __u64 mask; - __u64 addr; -}; - -/* valid values for type in kvm_s390_interrupt */ -#define KVM_S390_SIGP_STOP 0xfffe0000u -#define KVM_S390_PROGRAM_INT 0xfffe0001u -#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u -#define KVM_S390_RESTART 0xfffe0003u -#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u -#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u -#define KVM_S390_MCHK 0xfffe1000u -#define KVM_S390_INT_CLOCK_COMP 0xffff1004u -#define KVM_S390_INT_CPU_TIMER 0xffff1005u -#define KVM_S390_INT_VIRTIO 0xffff2603u -#define KVM_S390_INT_SERVICE 0xffff2401u -#define KVM_S390_INT_EMERGENCY 0xffff1201u -#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u -/* Anything below 0xfffe0000u is taken by INT_IO */ -#define KVM_S390_INT_IO(ai,cssid,ssid,schid) \ - (((schid)) | \ - ((ssid) << 16) | \ - ((cssid) << 18) | \ - ((ai) << 26)) -#define KVM_S390_INT_IO_MIN 0x00000000u -#define KVM_S390_INT_IO_MAX 0xfffdffffu -#define KVM_S390_INT_IO_AI_MASK 0x04000000u - - -struct kvm_s390_interrupt { - __u32 type; - __u32 parm; - __u64 parm64; -}; - -struct kvm_s390_io_info { - __u16 subchannel_id; - __u16 subchannel_nr; - __u32 io_int_parm; - __u32 io_int_word; -}; - -struct kvm_s390_ext_info { - __u32 ext_params; - __u32 pad; - __u64 ext_params2; -}; - -struct kvm_s390_pgm_info { - __u64 trans_exc_code; - __u64 mon_code; - __u64 per_address; - __u32 data_exc_code; - __u16 code; - __u16 mon_class_nr; - __u8 per_code; - __u8 per_atmid; - __u8 exc_access_id; - __u8 per_access_id; - __u8 op_access_id; -#define KVM_S390_PGM_FLAGS_ILC_VALID 0x01 -#define KVM_S390_PGM_FLAGS_ILC_0 0x02 -#define KVM_S390_PGM_FLAGS_ILC_1 0x04 -#define KVM_S390_PGM_FLAGS_ILC_MASK 0x06 -#define KVM_S390_PGM_FLAGS_NO_REWIND 0x08 - __u8 flags; - __u8 pad[2]; -}; - -struct kvm_s390_prefix_info { - __u32 address; -}; - -struct kvm_s390_extcall_info { - __u16 code; -}; - -struct kvm_s390_emerg_info { - __u16 code; -}; - -#define KVM_S390_STOP_FLAG_STORE_STATUS 0x01 -struct kvm_s390_stop_info { - __u32 flags; -}; - -struct kvm_s390_mchk_info { - __u64 cr14; - __u64 mcic; - __u64 failing_storage_address; - __u32 ext_damage_code; - __u32 pad; - __u8 fixed_logout[16]; -}; - -struct kvm_s390_irq { - __u64 type; - union { - struct kvm_s390_io_info io; - struct kvm_s390_ext_info ext; - struct kvm_s390_pgm_info pgm; - struct kvm_s390_emerg_info emerg; - struct kvm_s390_extcall_info extcall; - struct kvm_s390_prefix_info prefix; - struct kvm_s390_stop_info stop; - struct kvm_s390_mchk_info mchk; - char reserved[64]; - } u; -}; - -struct kvm_s390_irq_state { - __u64 buf; - __u32 flags; /* will stay unused for compatibility reasons */ - __u32 len; - __u32 reserved[4]; /* will stay unused for compatibility reasons */ -}; - /* for KVM_SET_GUEST_DEBUG */ #define KVM_GUESTDBG_ENABLE 0x00000001 @@ -806,50 +614,6 @@ struct kvm_enable_cap { __u8 pad[64]; }; -/* for KVM_PPC_GET_PVINFO */ - -#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) - -struct kvm_ppc_pvinfo { - /* out */ - __u32 flags; - __u32 hcall[4]; - __u8 pad[108]; -}; - -/* for KVM_PPC_GET_SMMU_INFO */ -#define KVM_PPC_PAGE_SIZES_MAX_SZ 8 - -struct kvm_ppc_one_page_size { - __u32 page_shift; /* Page shift (or 0) */ - __u32 pte_enc; /* Encoding in the HPTE (>>12) */ -}; - -struct kvm_ppc_one_seg_page_size { - __u32 page_shift; /* Base page shift of segment (or 0) */ - __u32 slb_enc; /* SLB encoding for BookS */ - struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ]; -}; - -#define KVM_PPC_PAGE_SIZES_REAL 0x00000001 -#define KVM_PPC_1T_SEGMENTS 0x00000002 -#define KVM_PPC_NO_HASH 0x00000004 - -struct kvm_ppc_smmu_info { - __u64 flags; - __u32 slb_size; - __u16 data_keys; /* # storage keys supported for data */ - __u16 instr_keys; /* # storage keys supported for instructions */ - struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; -}; - -/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */ -struct kvm_ppc_resize_hpt { - __u64 flags; - __u32 shift; - __u32 pad; -}; - #define KVMIO 0xAE /* machine type bits, to be used as argument to KVM_CREATE_VM */ @@ -919,9 +683,7 @@ struct kvm_ppc_resize_hpt { /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 #define KVM_CAP_USER_NMI 22 -#ifdef __KVM_HAVE_GUEST_DEBUG #define KVM_CAP_SET_GUEST_DEBUG 23 -#endif #ifdef __KVM_HAVE_PIT #define KVM_CAP_REINJECT_CONTROL 24 #endif @@ -1152,8 +914,6 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_GUEST_MEMFD 234 #define KVM_CAP_VM_TYPES 235 -#ifdef KVM_CAP_IRQ_ROUTING - struct kvm_irq_routing_irqchip { __u32 irqchip; __u32 pin; @@ -1218,42 +978,6 @@ struct kvm_irq_routing { struct kvm_irq_routing_entry entries[]; }; -#endif - -#ifdef KVM_CAP_MCE -/* x86 MCE */ -struct kvm_x86_mce { - __u64 status; - __u64 addr; - __u64 misc; - __u64 mcg_status; - __u8 bank; - __u8 pad1[7]; - __u64 pad2[3]; -}; -#endif - -#ifdef KVM_CAP_XEN_HVM -#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) -#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) -#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) -#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) -#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) -#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) -#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) -#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) - -struct kvm_xen_hvm_config { - __u32 flags; - __u32 msr; - __u64 blob_addr_32; - __u64 blob_addr_64; - __u8 blob_size_32; - __u8 blob_size_64; - __u8 pad2[30]; -}; -#endif - #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) /* * Available with KVM_CAP_IRQFD_RESAMPLE @@ -1438,11 +1162,6 @@ struct kvm_vfio_spapr_tce { struct kvm_userspace_memory_region2) /* enable ucontrol for s390 */ -struct kvm_s390_ucas_mapping { - __u64 user_addr; - __u64 vcpu_addr; - __u64 length; -}; #define KVM_S390_UCAS_MAP _IOW(KVMIO, 0x50, struct kvm_s390_ucas_mapping) #define KVM_S390_UCAS_UNMAP _IOW(KVMIO, 0x51, struct kvm_s390_ucas_mapping) #define KVM_S390_VCPU_FAULT _IOW(KVMIO, 0x52, unsigned long) @@ -1637,89 +1356,6 @@ struct kvm_enc_region { #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) -struct kvm_s390_pv_sec_parm { - __u64 origin; - __u64 length; -}; - -struct kvm_s390_pv_unp { - __u64 addr; - __u64 size; - __u64 tweak; -}; - -enum pv_cmd_dmp_id { - KVM_PV_DUMP_INIT, - KVM_PV_DUMP_CONFIG_STOR_STATE, - KVM_PV_DUMP_COMPLETE, - KVM_PV_DUMP_CPU, -}; - -struct kvm_s390_pv_dmp { - __u64 subcmd; - __u64 buff_addr; - __u64 buff_len; - __u64 gaddr; /* For dump storage state */ - __u64 reserved[4]; -}; - -enum pv_cmd_info_id { - KVM_PV_INFO_VM, - KVM_PV_INFO_DUMP, -}; - -struct kvm_s390_pv_info_dump { - __u64 dump_cpu_buffer_len; - __u64 dump_config_mem_buffer_per_1m; - __u64 dump_config_finalize_len; -}; - -struct kvm_s390_pv_info_vm { - __u64 inst_calls_list[4]; - __u64 max_cpus; - __u64 max_guests; - __u64 max_guest_addr; - __u64 feature_indication; -}; - -struct kvm_s390_pv_info_header { - __u32 id; - __u32 len_max; - __u32 len_written; - __u32 reserved; -}; - -struct kvm_s390_pv_info { - struct kvm_s390_pv_info_header header; - union { - struct kvm_s390_pv_info_dump dump; - struct kvm_s390_pv_info_vm vm; - }; -}; - -enum pv_cmd_id { - KVM_PV_ENABLE, - KVM_PV_DISABLE, - KVM_PV_SET_SEC_PARMS, - KVM_PV_UNPACK, - KVM_PV_VERIFY, - KVM_PV_PREP_RESET, - KVM_PV_UNSHARE_ALL, - KVM_PV_INFO, - KVM_PV_DUMP, - KVM_PV_ASYNC_CLEANUP_PREPARE, - KVM_PV_ASYNC_CLEANUP_PERFORM, -}; - -struct kvm_pv_cmd { - __u32 cmd; /* Command to be executed */ - __u16 rc; /* Ultravisor return code */ - __u16 rrc; /* Ultravisor return reason code */ - __u64 data; /* Data or address */ - __u32 flags; /* flags for future extensions. Must be 0 for now */ - __u32 reserved[3]; -}; - /* Available with KVM_CAP_S390_PROTECTED */ #define KVM_S390_PV_COMMAND _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd) @@ -1733,58 +1369,6 @@ struct kvm_pv_cmd { #define KVM_XEN_HVM_GET_ATTR _IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr) #define KVM_XEN_HVM_SET_ATTR _IOW(KVMIO, 0xc9, struct kvm_xen_hvm_attr) -struct kvm_xen_hvm_attr { - __u16 type; - __u16 pad[3]; - union { - __u8 long_mode; - __u8 vector; - __u8 runstate_update_flag; - struct { - __u64 gfn; -#define KVM_XEN_INVALID_GFN ((__u64)-1) - } shared_info; - struct { - __u32 send_port; - __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */ - __u32 flags; -#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0) -#define KVM_XEN_EVTCHN_UPDATE (1 << 1) -#define KVM_XEN_EVTCHN_RESET (1 << 2) - /* - * Events sent by the guest are either looped back to - * the guest itself (potentially on a different port#) - * or signalled via an eventfd. - */ - union { - struct { - __u32 port; - __u32 vcpu; - __u32 priority; - } port; - struct { - __u32 port; /* Zero for eventfd */ - __s32 fd; - } eventfd; - __u32 padding[4]; - } deliver; - } evtchn; - __u32 xen_version; - __u64 pad[8]; - } u; -}; - - -/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ -#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 -#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 -#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 -/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ -#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3 -#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4 -/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */ -#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5 - /* Per-vCPU Xen attributes */ #define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) #define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr) @@ -1795,242 +1379,6 @@ struct kvm_xen_hvm_attr { #define KVM_GET_SREGS2 _IOR(KVMIO, 0xcc, struct kvm_sregs2) #define KVM_SET_SREGS2 _IOW(KVMIO, 0xcd, struct kvm_sregs2) -struct kvm_xen_vcpu_attr { - __u16 type; - __u16 pad[3]; - union { - __u64 gpa; -#define KVM_XEN_INVALID_GPA ((__u64)-1) - __u64 pad[8]; - struct { - __u64 state; - __u64 state_entry_time; - __u64 time_running; - __u64 time_runnable; - __u64 time_blocked; - __u64 time_offline; - } runstate; - __u32 vcpu_id; - struct { - __u32 port; - __u32 priority; - __u64 expires_ns; - } timer; - __u8 vector; - } u; -}; - -/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ -#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 -#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 -#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2 -#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3 -#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4 -#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 -/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ -#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6 -#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7 -#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8 - -/* Secure Encrypted Virtualization command */ -enum sev_cmd_id { - /* Guest initialization commands */ - KVM_SEV_INIT = 0, - KVM_SEV_ES_INIT, - /* Guest launch commands */ - KVM_SEV_LAUNCH_START, - KVM_SEV_LAUNCH_UPDATE_DATA, - KVM_SEV_LAUNCH_UPDATE_VMSA, - KVM_SEV_LAUNCH_SECRET, - KVM_SEV_LAUNCH_MEASURE, - KVM_SEV_LAUNCH_FINISH, - /* Guest migration commands (outgoing) */ - KVM_SEV_SEND_START, - KVM_SEV_SEND_UPDATE_DATA, - KVM_SEV_SEND_UPDATE_VMSA, - KVM_SEV_SEND_FINISH, - /* Guest migration commands (incoming) */ - KVM_SEV_RECEIVE_START, - KVM_SEV_RECEIVE_UPDATE_DATA, - KVM_SEV_RECEIVE_UPDATE_VMSA, - KVM_SEV_RECEIVE_FINISH, - /* Guest status and debug commands */ - KVM_SEV_GUEST_STATUS, - KVM_SEV_DBG_DECRYPT, - KVM_SEV_DBG_ENCRYPT, - /* Guest certificates commands */ - KVM_SEV_CERT_EXPORT, - /* Attestation report */ - KVM_SEV_GET_ATTESTATION_REPORT, - /* Guest Migration Extension */ - KVM_SEV_SEND_CANCEL, - - KVM_SEV_NR_MAX, -}; - -struct kvm_sev_cmd { - __u32 id; - __u64 data; - __u32 error; - __u32 sev_fd; -}; - -struct kvm_sev_launch_start { - __u32 handle; - __u32 policy; - __u64 dh_uaddr; - __u32 dh_len; - __u64 session_uaddr; - __u32 session_len; -}; - -struct kvm_sev_launch_update_data { - __u64 uaddr; - __u32 len; -}; - - -struct kvm_sev_launch_secret { - __u64 hdr_uaddr; - __u32 hdr_len; - __u64 guest_uaddr; - __u32 guest_len; - __u64 trans_uaddr; - __u32 trans_len; -}; - -struct kvm_sev_launch_measure { - __u64 uaddr; - __u32 len; -}; - -struct kvm_sev_guest_status { - __u32 handle; - __u32 policy; - __u32 state; -}; - -struct kvm_sev_dbg { - __u64 src_uaddr; - __u64 dst_uaddr; - __u32 len; -}; - -struct kvm_sev_attestation_report { - __u8 mnonce[16]; - __u64 uaddr; - __u32 len; -}; - -struct kvm_sev_send_start { - __u32 policy; - __u64 pdh_cert_uaddr; - __u32 pdh_cert_len; - __u64 plat_certs_uaddr; - __u32 plat_certs_len; - __u64 amd_certs_uaddr; - __u32 amd_certs_len; - __u64 session_uaddr; - __u32 session_len; -}; - -struct kvm_sev_send_update_data { - __u64 hdr_uaddr; - __u32 hdr_len; - __u64 guest_uaddr; - __u32 guest_len; - __u64 trans_uaddr; - __u32 trans_len; -}; - -struct kvm_sev_receive_start { - __u32 handle; - __u32 policy; - __u64 pdh_uaddr; - __u32 pdh_len; - __u64 session_uaddr; - __u32 session_len; -}; - -struct kvm_sev_receive_update_data { - __u64 hdr_uaddr; - __u32 hdr_len; - __u64 guest_uaddr; - __u32 guest_len; - __u64 trans_uaddr; - __u32 trans_len; -}; - -#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) -#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) -#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) - -struct kvm_assigned_pci_dev { - __u32 assigned_dev_id; - __u32 busnr; - __u32 devfn; - __u32 flags; - __u32 segnr; - union { - __u32 reserved[11]; - }; -}; - -#define KVM_DEV_IRQ_HOST_INTX (1 << 0) -#define KVM_DEV_IRQ_HOST_MSI (1 << 1) -#define KVM_DEV_IRQ_HOST_MSIX (1 << 2) - -#define KVM_DEV_IRQ_GUEST_INTX (1 << 8) -#define KVM_DEV_IRQ_GUEST_MSI (1 << 9) -#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10) - -#define KVM_DEV_IRQ_HOST_MASK 0x00ff -#define KVM_DEV_IRQ_GUEST_MASK 0xff00 - -struct kvm_assigned_irq { - __u32 assigned_dev_id; - __u32 host_irq; /* ignored (legacy field) */ - __u32 guest_irq; - __u32 flags; - union { - __u32 reserved[12]; - }; -}; - -struct kvm_assigned_msix_nr { - __u32 assigned_dev_id; - __u16 entry_nr; - __u16 padding; -}; - -#define KVM_MAX_MSIX_PER_DEV 256 -struct kvm_assigned_msix_entry { - __u32 assigned_dev_id; - __u32 gsi; - __u16 entry; /* The index of entry in the MSI-X table */ - __u16 padding[3]; -}; - -#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) -#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) - -/* Available with KVM_CAP_ARM_USER_IRQ */ - -/* Bits for run->s.regs.device_irq_level */ -#define KVM_ARM_DEV_EL1_VTIMER (1 << 0) -#define KVM_ARM_DEV_EL1_PTIMER (1 << 1) -#define KVM_ARM_DEV_PMU (1 << 2) - -struct kvm_hyperv_eventfd { - __u32 conn_id; - __s32 fd; - __u32 flags; - __u32 padding[3]; -}; - -#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff -#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) - #define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) #define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) @@ -2176,33 +1524,6 @@ struct kvm_stats_desc { /* Available with KVM_CAP_S390_ZPCI_OP */ #define KVM_S390_ZPCI_OP _IOW(KVMIO, 0xd1, struct kvm_s390_zpci_op) -struct kvm_s390_zpci_op { - /* in */ - __u32 fh; /* target device */ - __u8 op; /* operation to perform */ - __u8 pad[3]; - union { - /* for KVM_S390_ZPCIOP_REG_AEN */ - struct { - __u64 ibv; /* Guest addr of interrupt bit vector */ - __u64 sb; /* Guest addr of summary bit */ - __u32 flags; - __u32 noi; /* Number of interrupts */ - __u8 isc; /* Guest interrupt subclass */ - __u8 sbo; /* Offset of guest summary bit vector */ - __u16 pad; - } reg_aen; - __u64 reserved[8]; - } u; -}; - -/* types for kvm_s390_zpci_op->op */ -#define KVM_S390_ZPCIOP_REG_AEN 0 -#define KVM_S390_ZPCIOP_DEREG_AEN 1 - -/* flags for kvm_s390_zpci_op->u.reg_aen.flags */ -#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) - /* Available with KVM_CAP_MEMORY_ATTRIBUTES */ #define KVM_SET_MEMORY_ATTRIBUTES _IOW(KVMIO, 0xd2, struct kvm_memory_attributes) diff --git a/linux-headers/linux/psp-sev.h b/linux-headers/linux/psp-sev.h index bcb21339ee..c3046c6bff 100644 --- a/linux-headers/linux/psp-sev.h +++ b/linux-headers/linux/psp-sev.h @@ -28,6 +28,9 @@ enum { SEV_PEK_CERT_IMPORT, SEV_GET_ID, /* This command is deprecated, use SEV_GET_ID2 */ SEV_GET_ID2, + SNP_PLATFORM_STATUS, + SNP_COMMIT, + SNP_SET_CONFIG, SEV_MAX, }; @@ -69,6 +72,12 @@ typedef enum { SEV_RET_RESOURCE_LIMIT, SEV_RET_SECURE_DATA_INVALID, SEV_RET_INVALID_KEY = 0x27, + SEV_RET_INVALID_PAGE_SIZE, + SEV_RET_INVALID_PAGE_STATE, + SEV_RET_INVALID_MDATA_ENTRY, + SEV_RET_INVALID_PAGE_OWNER, + SEV_RET_INVALID_PAGE_AEAD_OFLOW, + SEV_RET_RMP_INIT_REQUIRED, SEV_RET_MAX, } sev_ret_code; @@ -155,6 +164,56 @@ struct sev_user_data_get_id2 { __u32 length; /* In/Out */ } __attribute__((packed)); +/** + * struct sev_user_data_snp_status - SNP status + * + * @api_major: API major version + * @api_minor: API minor version + * @state: current platform state + * @is_rmp_initialized: whether RMP is initialized or not + * @rsvd: reserved + * @build_id: firmware build id for the API version + * @mask_chip_id: whether chip id is present in attestation reports or not + * @mask_chip_key: whether attestation reports are signed or not + * @vlek_en: VLEK (Version Loaded Endorsement Key) hashstick is loaded + * @rsvd1: reserved + * @guest_count: the number of guest currently managed by the firmware + * @current_tcb_version: current TCB version + * @reported_tcb_version: reported TCB version + */ +struct sev_user_data_snp_status { + __u8 api_major; /* Out */ + __u8 api_minor; /* Out */ + __u8 state; /* Out */ + __u8 is_rmp_initialized:1; /* Out */ + __u8 rsvd:7; + __u32 build_id; /* Out */ + __u32 mask_chip_id:1; /* Out */ + __u32 mask_chip_key:1; /* Out */ + __u32 vlek_en:1; /* Out */ + __u32 rsvd1:29; + __u32 guest_count; /* Out */ + __u64 current_tcb_version; /* Out */ + __u64 reported_tcb_version; /* Out */ +} __attribute__((packed)); + +/** + * struct sev_user_data_snp_config - system wide configuration value for SNP. + * + * @reported_tcb: the TCB version to report in the guest attestation report. + * @mask_chip_id: whether chip id is present in attestation reports or not + * @mask_chip_key: whether attestation reports are signed or not + * @rsvd: reserved + * @rsvd1: reserved + */ +struct sev_user_data_snp_config { + __u64 reported_tcb ; /* In */ + __u32 mask_chip_id:1; /* In */ + __u32 mask_chip_key:1; /* In */ + __u32 rsvd:30; /* In */ + __u8 rsvd1[52]; +} __attribute__((packed)); + /** * struct sev_issue_cmd - SEV ioctl parameters * diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h index 649560c685..bea6973906 100644 --- a/linux-headers/linux/vhost.h +++ b/linux-headers/linux/vhost.h @@ -227,4 +227,11 @@ */ #define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \ struct vhost_vring_state) + +/* Get the queue size of a specific virtqueue. + * userspace set the vring index in vhost_vring_state.index + * kernel set the queue size in vhost_vring_state.num + */ +#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x80, \ + struct vhost_vring_state) #endif From 08b2d15cdd0d3fbbe37ce23bf192b770db3a7539 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 18 Mar 2024 17:45:56 -0400 Subject: [PATCH 39/63] runstate: skip initial CPU reset if reset is not actually possible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Right now, the system reset is concluded by a call to cpu_synchronize_all_post_reset() in order to sync any changes that the machine reset callback applied to the CPU state. However, for VMs with encrypted state such as SEV-ES guests (currently the only case of guests with non-resettable CPUs) this cannot be done, because guest state has already been finalized by machine-init-done notifiers. cpu_synchronize_all_post_reset() does nothing on these guests, and actually we would like to make it fail if called once guest has been encrypted. So, assume that boards that support non-resettable CPUs do not touch CPU state and that all such setup is done before, at the time of cpu_synchronize_all_post_init(). Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini --- system/runstate.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/system/runstate.c b/system/runstate.c index d6ab860eca..cb4905a40f 100644 --- a/system/runstate.c +++ b/system/runstate.c @@ -501,7 +501,20 @@ void qemu_system_reset(ShutdownCause reason) default: qapi_event_send_reset(shutdown_caused_by_guest(reason), reason); } - cpu_synchronize_all_post_reset(); + + /* + * Some boards use the machine reset callback to point CPUs to the firmware + * entry point. Assume that this is not the case for boards that support + * non-resettable CPUs (currently used only for confidential guests), in + * which case cpu_synchronize_all_post_init() is enough because + * it does _more_ than cpu_synchronize_all_post_reset(). + */ + if (cpus_are_resettable()) { + cpu_synchronize_all_post_reset(); + } else { + assert(runstate_check(RUN_STATE_PRELAUNCH)); + } + vm_set_suspended(false); } From 5c3131c392f84c660033d511ec39872d8beb4b1e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 18 Mar 2024 14:41:10 -0400 Subject: [PATCH 40/63] KVM: track whether guest state is encrypted MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So far, KVM has allowed KVM_GET/SET_* ioctls to execute even if the guest state is encrypted, in which case they do nothing. For the new API using VM types, instead, the ioctls will fail which is a safer and more robust approach. The new API will be the only one available for SEV-SNP and TDX, but it is also usable for SEV and SEV-ES. In preparation for that, require architecture-specific KVM code to communicate the point at which guest state is protected (which must be after kvm_cpu_synchronize_post_init(), though that might change in the future in order to suppor migration). From that point, skip reading registers so that cpu->vcpu_dirty is never true: if it ever becomes true, kvm_arch_put_registers() will fail miserably. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini --- accel/kvm/kvm-all.c | 17 ++++++++++++++--- include/sysemu/kvm.h | 2 ++ include/sysemu/kvm_int.h | 1 + target/i386/sev.c | 1 + 4 files changed, 18 insertions(+), 3 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index d58916e33a..d6ebadbf38 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -2703,7 +2703,7 @@ bool kvm_cpu_check_are_resettable(void) static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) { - if (!cpu->vcpu_dirty) { + if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) { int ret = kvm_arch_get_registers(cpu); if (ret) { error_report("Failed to get registers: %s", strerror(-ret)); @@ -2717,7 +2717,7 @@ static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) void kvm_cpu_synchronize_state(CPUState *cpu) { - if (!cpu->vcpu_dirty) { + if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) { run_on_cpu(cpu, do_kvm_cpu_synchronize_state, RUN_ON_CPU_NULL); } } @@ -2752,7 +2752,13 @@ static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) void kvm_cpu_synchronize_post_init(CPUState *cpu) { - run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL); + if (!kvm_state->guest_state_protected) { + /* + * This runs before the machine_init_done notifiers, and is the last + * opportunity to synchronize the state of confidential guests. + */ + run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL); + } } static void do_kvm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg) @@ -4099,3 +4105,8 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp) query_stats_schema_vcpu(first_cpu, &stats_args); } } + +void kvm_mark_guest_state_protected(void) +{ + kvm_state->guest_state_protected = true; +} diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 2cba899270..14b1ddb3be 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -539,6 +539,8 @@ bool kvm_dirty_ring_enabled(void); uint32_t kvm_dirty_ring_size(void); +void kvm_mark_guest_state_protected(void); + /** * kvm_hwpoisoned_mem - indicate if there is any hwpoisoned page * reported for the VM. diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h index 94488d2c1a..227b61fec3 100644 --- a/include/sysemu/kvm_int.h +++ b/include/sysemu/kvm_int.h @@ -87,6 +87,7 @@ struct KVMState bool kernel_irqchip_required; OnOffAuto kernel_irqchip_split; bool sync_mmu; + bool guest_state_protected; uint64_t manual_dirty_log_protect; /* The man page (and posix) say ioctl numbers are signed int, but * they're not. Linux, glibc and *BSD all treat ioctl numbers as diff --git a/target/i386/sev.c b/target/i386/sev.c index b8f79d34d1..c49a8fd55e 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -755,6 +755,7 @@ sev_launch_get_measure(Notifier *notifier, void *unused) if (ret) { exit(1); } + kvm_mark_guest_state_protected(); } /* query the measurement blob length */ From a99c0c66ebe7d8db3af6f16689ade9375247e43e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 18 Mar 2024 14:41:33 -0400 Subject: [PATCH 41/63] KVM: remove kvm_arch_cpu_check_are_resettable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Board reset requires writing a fresh CPU state. As far as KVM is concerned, the only thing that blocks reset is that CPU state is encrypted; therefore, kvm_cpus_are_resettable() can simply check if that is the case. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini --- accel/kvm/kvm-accel-ops.c | 2 +- accel/kvm/kvm-all.c | 5 ----- include/sysemu/kvm.h | 10 ---------- target/arm/kvm.c | 5 ----- target/i386/kvm/kvm.c | 5 ----- target/loongarch/kvm/kvm.c | 5 ----- target/mips/kvm.c | 5 ----- target/ppc/kvm.c | 5 ----- target/riscv/kvm/kvm-cpu.c | 5 ----- target/s390x/kvm/kvm.c | 5 ----- 10 files changed, 1 insertion(+), 51 deletions(-) diff --git a/accel/kvm/kvm-accel-ops.c b/accel/kvm/kvm-accel-ops.c index f5ac643fca..94c828ac8d 100644 --- a/accel/kvm/kvm-accel-ops.c +++ b/accel/kvm/kvm-accel-ops.c @@ -82,7 +82,7 @@ static bool kvm_vcpu_thread_is_idle(CPUState *cpu) static bool kvm_cpus_are_resettable(void) { - return !kvm_enabled() || kvm_cpu_check_are_resettable(); + return !kvm_enabled() || !kvm_state->guest_state_protected; } #ifdef TARGET_KVM_HAVE_GUEST_DEBUG diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index d6ebadbf38..5a2fbee32a 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -2696,11 +2696,6 @@ void kvm_flush_coalesced_mmio_buffer(void) s->coalesced_flush_in_progress = false; } -bool kvm_cpu_check_are_resettable(void) -{ - return kvm_arch_cpu_check_are_resettable(); -} - static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) { if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) { diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 14b1ddb3be..bd247f3a23 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -525,16 +525,6 @@ int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target); /* Notify resamplefd for EOI of specific interrupts. */ void kvm_resample_fd_notify(int gsi); -/** - * kvm_cpu_check_are_resettable - return whether CPUs can be reset - * - * Returns: true: CPUs are resettable - * false: CPUs are not resettable - */ -bool kvm_cpu_check_are_resettable(void); - -bool kvm_arch_cpu_check_are_resettable(void); - bool kvm_dirty_ring_enabled(void); uint32_t kvm_dirty_ring_size(void); diff --git a/target/arm/kvm.c b/target/arm/kvm.c index ab85d628a8..21ebbf3b8f 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -1598,11 +1598,6 @@ int kvm_arch_msi_data_to_gsi(uint32_t data) return (data - 32) & 0xffff; } -bool kvm_arch_cpu_check_are_resettable(void) -{ - return true; -} - static void kvm_arch_get_eager_split_size(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index c64b9562c0..c8fdab40fc 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -5619,11 +5619,6 @@ bool kvm_has_waitpkg(void) return has_msr_umwait; } -bool kvm_arch_cpu_check_are_resettable(void) -{ - return !sev_es_enabled(); -} - #define ARCH_REQ_XCOMP_GUEST_PERM 0x1025 void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask) diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c index d630cc39cb..8224d94333 100644 --- a/target/loongarch/kvm/kvm.c +++ b/target/loongarch/kvm/kvm.c @@ -733,11 +733,6 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cs) return true; } -bool kvm_arch_cpu_check_are_resettable(void) -{ - return true; -} - int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) { int ret = 0; diff --git a/target/mips/kvm.c b/target/mips/kvm.c index 6c52e59f55..a631ab544f 100644 --- a/target/mips/kvm.c +++ b/target/mips/kvm.c @@ -1273,11 +1273,6 @@ int kvm_arch_get_default_type(MachineState *machine) return -1; } -bool kvm_arch_cpu_check_are_resettable(void) -{ - return true; -} - void kvm_arch_accel_class_init(ObjectClass *oc) { } diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 8231feb2d4..63930d4a77 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -2956,11 +2956,6 @@ void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset) } } -bool kvm_arch_cpu_check_are_resettable(void) -{ - return true; -} - void kvm_arch_accel_class_init(ObjectClass *oc) { } diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index 6a6c6cae80..49d2f3ad58 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -1475,11 +1475,6 @@ void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level) } } -bool kvm_arch_cpu_check_are_resettable(void) -{ - return true; -} - static int aia_mode; static const char *kvm_aia_mode_str(uint64_t mode) diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c index 4ce809c5d4..4dcd757cdc 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c @@ -2622,11 +2622,6 @@ void kvm_s390_stop_interrupt(S390CPU *cpu) kvm_s390_vcpu_interrupt(cpu, &irq); } -bool kvm_arch_cpu_check_are_resettable(void) -{ - return true; -} - int kvm_s390_get_zpci_op(void) { return cap_zpci_op; From d82e9c843d662f13821026618aba936eda31a6c0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 18 Mar 2024 11:07:43 -0400 Subject: [PATCH 42/63] target/i386: introduce x86-confidential-guest Introduce a common superclass for x86 confidential guest implementations. It will extend ConfidentialGuestSupportClass with a method that provides the VM type to be passed to KVM_CREATE_VM. Signed-off-by: Paolo Bonzini --- target/i386/confidential-guest.c | 33 ++++++++++++++++++++++++++ target/i386/confidential-guest.h | 40 ++++++++++++++++++++++++++++++++ target/i386/meson.build | 2 +- target/i386/sev.c | 6 ++--- 4 files changed, 77 insertions(+), 4 deletions(-) create mode 100644 target/i386/confidential-guest.c create mode 100644 target/i386/confidential-guest.h diff --git a/target/i386/confidential-guest.c b/target/i386/confidential-guest.c new file mode 100644 index 0000000000..b3727845ad --- /dev/null +++ b/target/i386/confidential-guest.c @@ -0,0 +1,33 @@ +/* + * QEMU Confidential Guest support + * + * Copyright (C) 2024 Red Hat, Inc. + * + * Authors: + * Paolo Bonzini + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" + +#include "confidential-guest.h" + +OBJECT_DEFINE_ABSTRACT_TYPE(X86ConfidentialGuest, + x86_confidential_guest, + X86_CONFIDENTIAL_GUEST, + CONFIDENTIAL_GUEST_SUPPORT) + +static void x86_confidential_guest_class_init(ObjectClass *oc, void *data) +{ +} + +static void x86_confidential_guest_init(Object *obj) +{ +} + +static void x86_confidential_guest_finalize(Object *obj) +{ +} diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h new file mode 100644 index 0000000000..ca12d5a8fb --- /dev/null +++ b/target/i386/confidential-guest.h @@ -0,0 +1,40 @@ +/* + * x86-specific confidential guest methods. + * + * Copyright (c) 2024 Red Hat Inc. + * + * Authors: + * Paolo Bonzini + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef TARGET_I386_CG_H +#define TARGET_I386_CG_H + +#include "qom/object.h" + +#include "exec/confidential-guest-support.h" + +#define TYPE_X86_CONFIDENTIAL_GUEST "x86-confidential-guest" + +OBJECT_DECLARE_TYPE(X86ConfidentialGuest, + X86ConfidentialGuestClass, + X86_CONFIDENTIAL_GUEST) + +struct X86ConfidentialGuest { + /* */ + ConfidentialGuestSupport parent_obj; +}; + +/** + * X86ConfidentialGuestClass: + * + * Class to be implemented by confidential-guest-support concrete objects + * for the x86 target. + */ +struct X86ConfidentialGuestClass { + /* */ + ConfidentialGuestSupportClass parent; +}; +#endif diff --git a/target/i386/meson.build b/target/i386/meson.build index 7c74bfa859..8abce725f8 100644 --- a/target/i386/meson.build +++ b/target/i386/meson.build @@ -6,7 +6,7 @@ i386_ss.add(files( 'xsave_helper.c', 'cpu-dump.c', )) -i386_ss.add(when: 'CONFIG_SEV', if_true: files('host-cpu.c')) +i386_ss.add(when: 'CONFIG_SEV', if_true: files('host-cpu.c', 'confidential-guest.c')) # x86 cpu type i386_ss.add(when: 'CONFIG_KVM', if_true: files('host-cpu.c')) diff --git a/target/i386/sev.c b/target/i386/sev.c index c49a8fd55e..ebe36d4c10 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -35,7 +35,7 @@ #include "monitor/monitor.h" #include "monitor/hmp-target.h" #include "qapi/qapi-commands-misc-target.h" -#include "exec/confidential-guest-support.h" +#include "confidential-guest.h" #include "hw/i386/pc.h" #include "exec/address-spaces.h" @@ -54,7 +54,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST) * -machine ...,memory-encryption=sev0 */ struct SevGuestState { - ConfidentialGuestSupport parent_obj; + X86ConfidentialGuest parent_obj; /* configuration parameters */ char *sev_device; @@ -1372,7 +1372,7 @@ sev_guest_instance_init(Object *obj) /* sev guest info */ static const TypeInfo sev_guest_info = { - .parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT, + .parent = TYPE_X86_CONFIDENTIAL_GUEST, .name = TYPE_SEV_GUEST, .instance_size = sizeof(SevGuestState), .instance_finalize = sev_guest_finalize, From ee88612df1e8d6c2bfec75bff3f9482ea44acec1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 19 Mar 2024 15:29:33 +0100 Subject: [PATCH 43/63] target/i386: Implement mc->kvm_type() to get VM type KVM is introducing a new API to create confidential guests, which will be used by TDX and SEV-SNP but is also available for SEV and SEV-ES. The API uses the VM type argument to KVM_CREATE_VM to identify which confidential computing technology to use. Since there are no other expected uses of VM types, delegate mc->kvm_type() for x86 boards to the confidential-guest-support object pointed to by ms->cgs. For example, if a sev-guest object is specified to confidential-guest-support, like, qemu -machine ...,confidential-guest-support=sev0 \ -object sev-guest,id=sev0,... it will check if a VM type KVM_X86_SEV_VM or KVM_X86_SEV_ES_VM is supported, and if so use them together with the KVM_SEV_INIT2 function of the KVM_MEMORY_ENCRYPT_OP ioctl. If not, it will fall back to KVM_SEV_INIT and KVM_SEV_ES_INIT. This is a preparatory work towards TDX and SEV-SNP support, but it will also enable support for VMSA features such as DebugSwap, which are only available via KVM_SEV_INIT2. Co-developed-by: Xiaoyao Li Signed-off-by: Xiaoyao Li Signed-off-by: Paolo Bonzini --- hw/i386/x86.c | 11 ++++++++ target/i386/confidential-guest.h | 19 ++++++++++++++ target/i386/kvm/kvm.c | 44 ++++++++++++++++++++++++++++++++ target/i386/kvm/kvm_i386.h | 2 ++ 4 files changed, 76 insertions(+) diff --git a/hw/i386/x86.c b/hw/i386/x86.c index 84a4801977..3d5b51e92d 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -1381,6 +1381,16 @@ static void machine_set_sgx_epc(Object *obj, Visitor *v, const char *name, qapi_free_SgxEPCList(list); } +static int x86_kvm_type(MachineState *ms, const char *vm_type) +{ + /* + * No x86 machine has a kvm-type property. If one is added that has + * it, it should call kvm_get_vm_type() directly or not use it at all. + */ + assert(vm_type == NULL); + return kvm_enabled() ? kvm_get_vm_type(ms) : 0; +} + static void x86_machine_initfn(Object *obj) { X86MachineState *x86ms = X86_MACHINE(obj); @@ -1405,6 +1415,7 @@ static void x86_machine_class_init(ObjectClass *oc, void *data) mc->cpu_index_to_instance_props = x86_cpu_index_to_props; mc->get_default_cpu_node_id = x86_get_default_cpu_node_id; mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids; + mc->kvm_type = x86_kvm_type; x86mc->save_tsc_khz = true; x86mc->fwcfg_dma_enabled = true; nc->nmi_monitor_handler = x86_nmi; diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h index ca12d5a8fb..532e172a60 100644 --- a/target/i386/confidential-guest.h +++ b/target/i386/confidential-guest.h @@ -36,5 +36,24 @@ struct X86ConfidentialGuest { struct X86ConfidentialGuestClass { /* */ ConfidentialGuestSupportClass parent; + + /* */ + int (*kvm_type)(X86ConfidentialGuest *cg); }; + +/** + * x86_confidential_guest_kvm_type: + * + * Calls #X86ConfidentialGuestClass.unplug callback of @plug_handler. + */ +static inline int x86_confidential_guest_kvm_type(X86ConfidentialGuest *cg) +{ + X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg); + + if (klass->kvm_type) { + return klass->kvm_type(cg); + } else { + return 0; + } +} #endif diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index c8fdab40fc..ae76924bc6 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -31,6 +31,7 @@ #include "sysemu/kvm_int.h" #include "sysemu/runstate.h" #include "kvm_i386.h" +#include "../confidential-guest.h" #include "sev.h" #include "xen-emu.h" #include "hyperv.h" @@ -161,6 +162,49 @@ static KVMMSRHandlers msr_handlers[KVM_MSR_FILTER_MAX_RANGES]; static RateLimit bus_lock_ratelimit_ctrl; static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value); +static const char *vm_type_name[] = { + [KVM_X86_DEFAULT_VM] = "default", +}; + +bool kvm_is_vm_type_supported(int type) +{ + uint32_t machine_types; + + /* + * old KVM doesn't support KVM_CAP_VM_TYPES but KVM_X86_DEFAULT_VM + * is always supported + */ + if (type == KVM_X86_DEFAULT_VM) { + return true; + } + + machine_types = kvm_check_extension(KVM_STATE(current_machine->accelerator), + KVM_CAP_VM_TYPES); + return !!(machine_types & BIT(type)); +} + +int kvm_get_vm_type(MachineState *ms) +{ + int kvm_type = KVM_X86_DEFAULT_VM; + + if (ms->cgs) { + if (!object_dynamic_cast(OBJECT(ms->cgs), TYPE_X86_CONFIDENTIAL_GUEST)) { + error_report("configuration type %s not supported for x86 guests", + object_get_typename(OBJECT(ms->cgs))); + exit(1); + } + kvm_type = x86_confidential_guest_kvm_type( + X86_CONFIDENTIAL_GUEST(ms->cgs)); + } + + if (!kvm_is_vm_type_supported(kvm_type)) { + error_report("vm-type %s not supported by KVM", vm_type_name[kvm_type]); + exit(1); + } + + return kvm_type; +} + bool kvm_has_smm(void) { return kvm_vm_check_extension(kvm_state, KVM_CAP_X86_SMM); diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h index 30fedcffea..6b44844d95 100644 --- a/target/i386/kvm/kvm_i386.h +++ b/target/i386/kvm/kvm_i386.h @@ -37,6 +37,7 @@ bool kvm_hv_vpindex_settable(void); bool kvm_enable_sgx_provisioning(KVMState *s); bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp); +int kvm_get_vm_type(MachineState *ms); void kvm_arch_reset_vcpu(X86CPU *cs); void kvm_arch_after_reset_vcpu(X86CPU *cpu); void kvm_arch_do_init_vcpu(X86CPU *cs); @@ -49,6 +50,7 @@ void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask); #ifdef CONFIG_KVM +bool kvm_is_vm_type_supported(int type); bool kvm_has_adjust_clock_stable(void); bool kvm_has_exception_payload(void); void kvm_synchronize_all_tsc(void); From 663e2f443e5722370708ce2f4c27d94a2087d2d3 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 19 Mar 2024 15:30:25 +0100 Subject: [PATCH 44/63] target/i386: SEV: use KVM_SEV_INIT2 if possible Implement support for the KVM_X86_SEV_VM and KVM_X86_SEV_ES_VM virtual machine types, and the KVM_SEV_INIT2 function of KVM_MEMORY_ENCRYPT_OP. These replace the KVM_SEV_INIT and KVM_SEV_ES_INIT functions, and have several advantages: - sharing the initialization sequence with SEV-SNP and TDX - allowing arguments including the set of desired VMSA features - protection against invalid use of KVM_GET/SET_* ioctls for guests with encrypted state If the KVM_X86_SEV_VM and KVM_X86_SEV_ES_VM types are not supported, fall back to KVM_SEV_INIT and KVM_SEV_ES_INIT (which use the default x86 VM type). Signed-off-by: Paolo Bonzini --- target/i386/kvm/kvm.c | 2 ++ target/i386/sev.c | 41 +++++++++++++++++++++++++++++++++++++---- 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index ae76924bc6..c5943605ee 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -164,6 +164,8 @@ static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value); static const char *vm_type_name[] = { [KVM_X86_DEFAULT_VM] = "default", + [KVM_X86_SEV_VM] = "SEV", + [KVM_X86_SEV_ES_VM] = "SEV-ES", }; bool kvm_is_vm_type_supported(int type) diff --git a/target/i386/sev.c b/target/i386/sev.c index ebe36d4c10..9dab4060b8 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -26,6 +26,7 @@ #include "qemu/error-report.h" #include "crypto/hash.h" #include "sysemu/kvm.h" +#include "kvm/kvm_i386.h" #include "sev.h" #include "sysemu/sysemu.h" #include "sysemu/runstate.h" @@ -56,6 +57,8 @@ OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST) struct SevGuestState { X86ConfidentialGuest parent_obj; + int kvm_type; + /* configuration parameters */ char *sev_device; uint32_t policy; @@ -850,6 +853,26 @@ sev_vm_state_change(void *opaque, bool running, RunState state) } } +static int sev_kvm_type(X86ConfidentialGuest *cg) +{ + SevGuestState *sev = SEV_GUEST(cg); + int kvm_type; + + if (sev->kvm_type != -1) { + goto out; + } + + kvm_type = (sev->policy & SEV_POLICY_ES) ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; + if (kvm_is_vm_type_supported(kvm_type)) { + sev->kvm_type = kvm_type; + } else { + sev->kvm_type = KVM_X86_DEFAULT_VM; + } + +out: + return sev->kvm_type; +} + static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { SevGuestState *sev = SEV_GUEST(cgs); @@ -929,13 +952,19 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) __func__); goto err; } - cmd = KVM_SEV_ES_INIT; - } else { - cmd = KVM_SEV_INIT; } trace_kvm_sev_init(); - ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error); + if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev)) == KVM_X86_DEFAULT_VM) { + cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT; + + ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error); + } else { + struct kvm_sev_init args = { 0 }; + + ret = sev_ioctl(sev->sev_fd, KVM_SEV_INIT2, &args, &fw_error); + } + if (ret) { error_setg(errp, "%s: failed to initialize ret=%d fw_error=%d '%s'", __func__, ret, fw_error, fw_error_to_str(fw_error)); @@ -1327,8 +1356,10 @@ static void sev_guest_class_init(ObjectClass *oc, void *data) { ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); klass->kvm_init = sev_kvm_init; + x86_klass->kvm_type = sev_kvm_type; object_class_property_add_str(oc, "sev-device", sev_guest_get_sev_device, @@ -1357,6 +1388,8 @@ sev_guest_instance_init(Object *obj) { SevGuestState *sev = SEV_GUEST(obj); + sev->kvm_type = -1; + sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE); sev->policy = DEFAULT_GUEST_POLICY; object_property_add_uint32_ptr(obj, "policy", &sev->policy, From 023267334da375226720e62963df9545aa8fc2fd Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Tue, 9 Apr 2024 18:07:41 -0500 Subject: [PATCH 45/63] i386/sev: Add 'legacy-vm-type' parameter for SEV guest objects QEMU will currently automatically make use of the KVM_SEV_INIT2 API for initializing SEV and SEV-ES guests verses the older KVM_SEV_INIT/KVM_SEV_ES_INIT interfaces. However, the older interfaces will silently avoid sync'ing FPU/XSAVE state to the VMSA prior to encryption, thus relying on behavior and measurements that assume the related fields to be allow zero. With KVM_SEV_INIT2, this state is now synced into the VMSA, resulting in measurements changes and, theoretically, behaviorial changes, though the latter are unlikely to be seen in practice. To allow a smooth transition to the newer interface, while still providing a mechanism to maintain backward compatibility with VMs created using the older interfaces, provide a new command-line parameter: -object sev-guest,legacy-vm-type=true,... and have it default to false. Signed-off-by: Michael Roth Message-ID: <20240409230743.962513-2-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- qapi/qom.json | 11 ++++++++++- target/i386/sev.c | 18 +++++++++++++++++- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/qapi/qom.json b/qapi/qom.json index 85e6b4f84a..38dde6d785 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -898,6 +898,14 @@ # designated guest firmware page for measured boot with -kernel # (default: false) (since 6.2) # +# @legacy-vm-type: Use legacy KVM_SEV_INIT KVM interface for creating the VM. +# The newer KVM_SEV_INIT2 interface syncs additional vCPU +# state when initializing the VMSA structures, which will +# result in a different guest measurement. Set this to +# maintain compatibility with older QEMU or kernel versions +# that rely on legacy KVM_SEV_INIT behavior. +# (default: false) (since 9.1) +# # Since: 2.12 ## { 'struct': 'SevGuestProperties', @@ -908,7 +916,8 @@ '*handle': 'uint32', '*cbitpos': 'uint32', 'reduced-phys-bits': 'uint32', - '*kernel-hashes': 'bool' } } + '*kernel-hashes': 'bool', + '*legacy-vm-type': 'bool' } } ## # @ThreadContextProperties: diff --git a/target/i386/sev.c b/target/i386/sev.c index 9dab4060b8..f4ee317cb0 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -67,6 +67,7 @@ struct SevGuestState { uint32_t cbitpos; uint32_t reduced_phys_bits; bool kernel_hashes; + bool legacy_vm_type; /* runtime state */ uint32_t handle; @@ -356,6 +357,16 @@ static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp) sev->kernel_hashes = value; } +static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp) +{ + return SEV_GUEST(obj)->legacy_vm_type; +} + +static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) +{ + SEV_GUEST(obj)->legacy_vm_type = value; +} + bool sev_enabled(void) { @@ -863,7 +874,7 @@ static int sev_kvm_type(X86ConfidentialGuest *cg) } kvm_type = (sev->policy & SEV_POLICY_ES) ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; - if (kvm_is_vm_type_supported(kvm_type)) { + if (kvm_is_vm_type_supported(kvm_type) && !sev->legacy_vm_type) { sev->kvm_type = kvm_type; } else { sev->kvm_type = KVM_X86_DEFAULT_VM; @@ -1381,6 +1392,11 @@ sev_guest_class_init(ObjectClass *oc, void *data) sev_guest_set_kernel_hashes); object_class_property_set_description(oc, "kernel-hashes", "add kernel hashes to guest firmware for measured Linux boot"); + object_class_property_add_bool(oc, "legacy-vm-type", + sev_guest_get_legacy_vm_type, + sev_guest_set_legacy_vm_type); + object_class_property_set_description(oc, "legacy-vm-type", + "use legacy VM type to maintain measurement compatibility with older QEMU or kernel versions."); } static void From ea7fbd37537b3a598335c21ccb2ea674630fc810 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Tue, 9 Apr 2024 18:07:43 -0500 Subject: [PATCH 46/63] hw/i386/sev: Use legacy SEV VM types for older machine types Newer 9.1 machine types will default to using the KVM_SEV_INIT2 API for creating SEV/SEV-ES going forward. However, this API results in guest measurement changes which are generally not expected for users of these older guest types and can cause disruption if they switch to a newer QEMU/kernel version. Avoid this by continuing to use the older KVM_SEV_INIT/KVM_SEV_ES_INIT APIs for older machine types. Signed-off-by: Michael Roth Message-ID: <20240409230743.962513-4-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- hw/i386/pc.c | 1 + target/i386/sev.c | 1 + 2 files changed, 2 insertions(+) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 41909f7bd7..08c7de416f 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -80,6 +80,7 @@ GlobalProperty pc_compat_9_0[] = { { TYPE_X86_CPU, "guest-phys-bits", "0" }, + { "sev-guest", "legacy-vm-type", "true" }, }; const size_t pc_compat_9_0_len = G_N_ELEMENTS(pc_compat_9_0); diff --git a/target/i386/sev.c b/target/i386/sev.c index f4ee317cb0..d30b68c11e 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -1417,6 +1417,7 @@ sev_guest_instance_init(Object *obj) object_property_add_uint32_ptr(obj, "reduced-phys-bits", &sev->reduced_phys_bits, OBJ_PROP_FLAG_READWRITE); + object_apply_compat_props(obj); } /* sev guest info */ From 72853afc638b3e28779c86dd05da2f3bb149fe2c Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 29 Feb 2024 01:36:25 -0500 Subject: [PATCH 47/63] trace/kvm: Split address space and slot id in trace_kvm_set_user_memory() The upper 16 bits of kvm_userspace_memory_region::slot are address space id. Parse it separately in trace_kvm_set_user_memory(). Signed-off-by: Xiaoyao Li Message-ID: <20240229063726.610065-5-xiaoyao.li@intel.com> Signed-off-by: Paolo Bonzini --- accel/kvm/kvm-all.c | 5 +++-- accel/kvm/trace-events | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 5a2fbee32a..ed50c80b1e 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -303,8 +303,9 @@ static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, boo ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); slot->old_flags = mem.flags; err: - trace_kvm_set_user_memory(mem.slot, mem.flags, mem.guest_phys_addr, - mem.memory_size, mem.userspace_addr, ret); + trace_kvm_set_user_memory(mem.slot >> 16, (uint16_t)mem.slot, mem.flags, + mem.guest_phys_addr, mem.memory_size, + mem.userspace_addr, ret); if (ret < 0) { error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d," " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s", diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events index a25902597b..9f599abc17 100644 --- a/accel/kvm/trace-events +++ b/accel/kvm/trace-events @@ -15,7 +15,7 @@ kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" kvm_irqchip_release_virq(int virq) "virq %d" kvm_set_ioeventfd_mmio(int fd, uint64_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%" PRIx64 " val=0x%x assign: %d size: %d match: %d" kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%x val=0x%x assign: %d size: %d match: %d" -kvm_set_user_memory(uint32_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d" +kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d" kvm_clear_dirty_log(uint32_t slot, uint64_t start, uint32_t size) "slot#%"PRId32" start 0x%"PRIx64" size 0x%"PRIx32 kvm_resample_fd_notify(int gsi) "gsi %d" kvm_dirty_ring_full(int id) "vcpu %d" From 0811baed49010a9b651b8029ab6b9828b09a884f Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Wed, 20 Mar 2024 03:39:06 -0500 Subject: [PATCH 48/63] kvm: Introduce support for memory_attributes Introduce the helper functions to set the attributes of a range of memory to private or shared. This is necessary to notify KVM the private/shared attribute of each gpa range. KVM needs the information to decide the GPA needs to be mapped at hva-based shared memory or guest_memfd based private memory. Signed-off-by: Xiaoyao Li Message-ID: <20240320083945.991426-11-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- accel/kvm/kvm-all.c | 32 ++++++++++++++++++++++++++++++++ include/sysemu/kvm.h | 4 ++++ 2 files changed, 36 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index ed50c80b1e..db0b1a16ed 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -91,6 +91,7 @@ bool kvm_msi_use_devid; static bool kvm_has_guest_debug; static int kvm_sstep_flags; static bool kvm_immediate_exit; +static uint64_t kvm_supported_memory_attributes; static hwaddr kvm_max_slot_size = ~0; static const KVMCapabilityInfo kvm_required_capabilites[] = { @@ -1266,6 +1267,36 @@ void kvm_set_max_memslot_size(hwaddr max_slot_size) kvm_max_slot_size = max_slot_size; } +static int kvm_set_memory_attributes(hwaddr start, uint64_t size, uint64_t attr) +{ + struct kvm_memory_attributes attrs; + int r; + + assert((attr & kvm_supported_memory_attributes) == attr); + attrs.attributes = attr; + attrs.address = start; + attrs.size = size; + attrs.flags = 0; + + r = kvm_vm_ioctl(kvm_state, KVM_SET_MEMORY_ATTRIBUTES, &attrs); + if (r) { + error_report("failed to set memory (0x%" HWADDR_PRIx "+0x%" PRIx64 ") " + "with attr 0x%" PRIx64 " error '%s'", + start, size, attr, strerror(errno)); + } + return r; +} + +int kvm_set_memory_attributes_private(hwaddr start, uint64_t size) +{ + return kvm_set_memory_attributes(start, size, KVM_MEMORY_ATTRIBUTE_PRIVATE); +} + +int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size) +{ + return kvm_set_memory_attributes(start, size, 0); +} + /* Called with KVMMemoryListener.slots_lock held */ static void kvm_set_phys_mem(KVMMemoryListener *kml, MemoryRegionSection *section, bool add) @@ -2387,6 +2418,7 @@ static int kvm_init(MachineState *ms) goto err; } + kvm_supported_memory_attributes = kvm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES); kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT); s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS); diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index bd247f3a23..594ae9b460 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -536,4 +536,8 @@ void kvm_mark_guest_state_protected(void); * reported for the VM. */ bool kvm_hwpoisoned_mem(void); + +int kvm_set_memory_attributes_private(hwaddr start, uint64_t size); +int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size); + #endif From 15f7a80c49cb3637f62fa37fa4a17da913bd91ff Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Wed, 20 Mar 2024 03:39:02 -0500 Subject: [PATCH 49/63] RAMBlock: Add support of KVM private guest memfd Add KVM guest_memfd support to RAMBlock so both normal hva based memory and kvm guest memfd based private memory can be associated in one RAMBlock. Introduce new flag RAM_GUEST_MEMFD. When it's set, it calls KVM ioctl to create private guest_memfd during RAMBlock setup. Allocating a new RAM_GUEST_MEMFD flag to instruct the setup of guest memfd is more flexible and extensible than simply relying on the VM type because in the future we may have the case that not all the memory of a VM need guest memfd. As a benefit, it also avoid getting MachineState in memory subsystem. Note, RAM_GUEST_MEMFD is supposed to be set for memory backends of confidential guests, such as TDX VM. How and when to set it for memory backends will be implemented in the following patches. Introduce memory_region_has_guest_memfd() to query if the MemoryRegion has KVM guest_memfd allocated. Signed-off-by: Xiaoyao Li Reviewed-by: David Hildenbrand Message-ID: <20240320083945.991426-7-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- accel/kvm/kvm-all.c | 28 ++++++++++++++++++++++++++++ accel/stubs/kvm-stub.c | 5 +++++ include/exec/memory.h | 20 +++++++++++++++++--- include/exec/ram_addr.h | 2 +- include/exec/ramblock.h | 1 + include/sysemu/kvm.h | 2 ++ system/memory.c | 5 +++++ system/physmem.c | 34 +++++++++++++++++++++++++++++++--- 8 files changed, 90 insertions(+), 7 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index db0b1a16ed..1b7bbd838c 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -92,6 +92,7 @@ static bool kvm_has_guest_debug; static int kvm_sstep_flags; static bool kvm_immediate_exit; static uint64_t kvm_supported_memory_attributes; +static bool kvm_guest_memfd_supported; static hwaddr kvm_max_slot_size = ~0; static const KVMCapabilityInfo kvm_required_capabilites[] = { @@ -2419,6 +2420,11 @@ static int kvm_init(MachineState *ms) } kvm_supported_memory_attributes = kvm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES); + kvm_guest_memfd_supported = + kvm_check_extension(s, KVM_CAP_GUEST_MEMFD) && + kvm_check_extension(s, KVM_CAP_USER_MEMORY2) && + (kvm_supported_memory_attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE); + kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT); s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS); @@ -4138,3 +4144,25 @@ void kvm_mark_guest_state_protected(void) { kvm_state->guest_state_protected = true; } + +int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp) +{ + int fd; + struct kvm_create_guest_memfd guest_memfd = { + .size = size, + .flags = flags, + }; + + if (!kvm_guest_memfd_supported) { + error_setg(errp, "KVM does not support guest_memfd"); + return -1; + } + + fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_GUEST_MEMFD, &guest_memfd); + if (fd < 0) { + error_setg_errno(errp, errno, "Error creating KVM guest_memfd"); + return -1; + } + + return fd; +} diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c index ca38172884..8e0eb22e61 100644 --- a/accel/stubs/kvm-stub.c +++ b/accel/stubs/kvm-stub.c @@ -129,3 +129,8 @@ bool kvm_hwpoisoned_mem(void) { return false; } + +int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp) +{ + return -ENOSYS; +} diff --git a/include/exec/memory.h b/include/exec/memory.h index 8626a355b3..679a847685 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -243,6 +243,9 @@ typedef struct IOMMUTLBEvent { /* RAM FD is opened read-only */ #define RAM_READONLY_FD (1 << 11) +/* RAM can be private that has kvm guest memfd backend */ +#define RAM_GUEST_MEMFD (1 << 12) + static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn, IOMMUNotifierFlag flags, hwaddr start, hwaddr end, @@ -1307,7 +1310,8 @@ bool memory_region_init_ram_nomigrate(MemoryRegion *mr, * @name: Region name, becomes part of RAMBlock name used in migration stream * must be unique within any device * @size: size of the region. - * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE. + * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE, + * RAM_GUEST_MEMFD. * @errp: pointer to Error*, to store an error if it happens. * * Note that this function does not do anything to cause the data in the @@ -1369,7 +1373,7 @@ bool memory_region_init_resizeable_ram(MemoryRegion *mr, * (getpagesize()) will be used. * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, - * RAM_READONLY_FD + * RAM_READONLY_FD, RAM_GUEST_MEMFD * @path: the path in which to allocate the RAM. * @offset: offset within the file referenced by path * @errp: pointer to Error*, to store an error if it happens. @@ -1399,7 +1403,7 @@ bool memory_region_init_ram_from_file(MemoryRegion *mr, * @size: size of the region. * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, - * RAM_READONLY_FD + * RAM_READONLY_FD, RAM_GUEST_MEMFD * @fd: the fd to mmap. * @offset: offset within the file referenced by fd * @errp: pointer to Error*, to store an error if it happens. @@ -1722,6 +1726,16 @@ static inline bool memory_region_is_romd(MemoryRegion *mr) */ bool memory_region_is_protected(MemoryRegion *mr); +/** + * memory_region_has_guest_memfd: check whether a memory region has guest_memfd + * associated + * + * Returns %true if a memory region's ram_block has valid guest_memfd assigned. + * + * @mr: the memory region being queried + */ +bool memory_region_has_guest_memfd(MemoryRegion *mr); + /** * memory_region_get_iommu: check whether a memory region is an iommu * diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index de45ba7bc9..07c8f86375 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -110,7 +110,7 @@ long qemu_maxrampagesize(void); * @mr: the memory region where the ram block is * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, - * RAM_READONLY_FD + * RAM_READONLY_FD, RAM_GUEST_MEMFD * @mem_path or @fd: specify the backing file or device * @offset: Offset into target file * @errp: pointer to Error*, to store an error if it happens diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h index 848915ea5b..459c8917de 100644 --- a/include/exec/ramblock.h +++ b/include/exec/ramblock.h @@ -41,6 +41,7 @@ struct RAMBlock { QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers; int fd; uint64_t fd_offset; + int guest_memfd; size_t page_size; /* dirty bitmap used during migration */ unsigned long *bmap; diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 594ae9b460..217f3fe17b 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -537,6 +537,8 @@ void kvm_mark_guest_state_protected(void); */ bool kvm_hwpoisoned_mem(void); +int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp); + int kvm_set_memory_attributes_private(hwaddr start, uint64_t size); int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size); diff --git a/system/memory.c b/system/memory.c index a229a79988..c756950c0c 100644 --- a/system/memory.c +++ b/system/memory.c @@ -1850,6 +1850,11 @@ bool memory_region_is_protected(MemoryRegion *mr) return mr->ram && (mr->ram_block->flags & RAM_PROTECTED); } +bool memory_region_has_guest_memfd(MemoryRegion *mr) +{ + return mr->ram_block && mr->ram_block->guest_memfd >= 0; +} + uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr) { uint8_t mask = mr->dirty_log_mask; diff --git a/system/physmem.c b/system/physmem.c index a4fe3d2bf8..f5dfa20e57 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -1808,6 +1808,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) const bool shared = qemu_ram_is_shared(new_block); RAMBlock *block; RAMBlock *last_block = NULL; + bool free_on_error = false; ram_addr_t old_ram_size, new_ram_size; Error *err = NULL; @@ -1837,6 +1838,19 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) return; } memory_try_enable_merging(new_block->host, new_block->max_length); + free_on_error = true; + } + } + + if (new_block->flags & RAM_GUEST_MEMFD) { + assert(kvm_enabled()); + assert(new_block->guest_memfd < 0); + + new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length, + 0, errp); + if (new_block->guest_memfd < 0) { + qemu_mutex_unlock_ramlist(); + goto out_free; } } @@ -1888,6 +1902,13 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) ram_block_notify_add(new_block->host, new_block->used_length, new_block->max_length); } + return; + +out_free: + if (free_on_error) { + qemu_anon_ram_free(new_block->host, new_block->max_length); + new_block->host = NULL; + } } #ifdef CONFIG_POSIX @@ -1902,7 +1923,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, /* Just support these ram flags by now. */ assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE | RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY | - RAM_READONLY_FD)) == 0); + RAM_READONLY_FD | RAM_GUEST_MEMFD)) == 0); if (xen_enabled()) { error_setg(errp, "-mem-path not supported with Xen"); @@ -1939,6 +1960,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, new_block->used_length = size; new_block->max_length = size; new_block->flags = ram_flags; + new_block->guest_memfd = -1; new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset, errp); if (!new_block->host) { @@ -2018,7 +2040,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size, int align; assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC | - RAM_NORESERVE)) == 0); + RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0); assert(!host ^ (ram_flags & RAM_PREALLOC)); align = qemu_real_host_page_size(); @@ -2033,6 +2055,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size, new_block->max_length = max_size; assert(max_size >= size); new_block->fd = -1; + new_block->guest_memfd = -1; new_block->page_size = qemu_real_host_page_size(); new_block->host = host; new_block->flags = ram_flags; @@ -2055,7 +2078,7 @@ RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags, MemoryRegion *mr, Error **errp) { - assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE)) == 0); + assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0); return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp); } @@ -2083,6 +2106,11 @@ static void reclaim_ramblock(RAMBlock *block) } else { qemu_anon_ram_free(block->host, block->max_length); } + + if (block->guest_memfd >= 0) { + close(block->guest_memfd); + } + g_free(block); } From ce5a983233b4ca94ced88c9581014346509b5c71 Mon Sep 17 00:00:00 2001 From: Chao Peng Date: Wed, 20 Mar 2024 03:39:05 -0500 Subject: [PATCH 50/63] kvm: Enable KVM_SET_USER_MEMORY_REGION2 for memslot Switch to KVM_SET_USER_MEMORY_REGION2 when supported by KVM. With KVM_SET_USER_MEMORY_REGION2, QEMU can set up memory region that backend'ed both by hva-based shared memory and guest memfd based private memory. Signed-off-by: Chao Peng Co-developed-by: Xiaoyao Li Signed-off-by: Xiaoyao Li Message-ID: <20240320083945.991426-10-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- accel/kvm/kvm-all.c | 46 +++++++++++++++++++++++++++++++++------- accel/kvm/trace-events | 2 +- include/sysemu/kvm_int.h | 2 ++ 3 files changed, 41 insertions(+), 9 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 1b7bbd838c..0386d4901f 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -284,35 +284,58 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram, static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, bool new) { KVMState *s = kvm_state; - struct kvm_userspace_memory_region mem; + struct kvm_userspace_memory_region2 mem; int ret; mem.slot = slot->slot | (kml->as_id << 16); mem.guest_phys_addr = slot->start_addr; mem.userspace_addr = (unsigned long)slot->ram; mem.flags = slot->flags; + mem.guest_memfd = slot->guest_memfd; + mem.guest_memfd_offset = slot->guest_memfd_offset; if (slot->memory_size && !new && (mem.flags ^ slot->old_flags) & KVM_MEM_READONLY) { /* Set the slot size to 0 before setting the slot to the desired * value. This is needed based on KVM commit 75d61fbc. */ mem.memory_size = 0; - ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); + + if (kvm_guest_memfd_supported) { + ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem); + } else { + ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); + } if (ret < 0) { goto err; } } mem.memory_size = slot->memory_size; - ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); + if (kvm_guest_memfd_supported) { + ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem); + } else { + ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); + } slot->old_flags = mem.flags; err: trace_kvm_set_user_memory(mem.slot >> 16, (uint16_t)mem.slot, mem.flags, mem.guest_phys_addr, mem.memory_size, - mem.userspace_addr, ret); + mem.userspace_addr, mem.guest_memfd, + mem.guest_memfd_offset, ret); if (ret < 0) { - error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d," - " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s", - __func__, mem.slot, slot->start_addr, - (uint64_t)mem.memory_size, strerror(errno)); + if (kvm_guest_memfd_supported) { + error_report("%s: KVM_SET_USER_MEMORY_REGION2 failed, slot=%d," + " start=0x%" PRIx64 ", size=0x%" PRIx64 "," + " flags=0x%" PRIx32 ", guest_memfd=%" PRId32 "," + " guest_memfd_offset=0x%" PRIx64 ": %s", + __func__, mem.slot, slot->start_addr, + (uint64_t)mem.memory_size, mem.flags, + mem.guest_memfd, (uint64_t)mem.guest_memfd_offset, + strerror(errno)); + } else { + error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d," + " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s", + __func__, mem.slot, slot->start_addr, + (uint64_t)mem.memory_size, strerror(errno)); + } } return ret; } @@ -467,6 +490,10 @@ static int kvm_mem_flags(MemoryRegion *mr) if (readonly && kvm_readonly_mem_allowed) { flags |= KVM_MEM_READONLY; } + if (memory_region_has_guest_memfd(mr)) { + assert(kvm_guest_memfd_supported); + flags |= KVM_MEM_GUEST_MEMFD; + } return flags; } @@ -1394,6 +1421,9 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, mem->ram_start_offset = ram_start_offset; mem->ram = ram; mem->flags = kvm_mem_flags(mr); + mem->guest_memfd = mr->ram_block->guest_memfd; + mem->guest_memfd_offset = (uint8_t*)ram - mr->ram_block->host; + kvm_slot_init_dirty_bitmap(mem); err = kvm_set_user_memory_region(kml, mem, true); if (err) { diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events index 9f599abc17..e8c52cb9e7 100644 --- a/accel/kvm/trace-events +++ b/accel/kvm/trace-events @@ -15,7 +15,7 @@ kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" kvm_irqchip_release_virq(int virq) "virq %d" kvm_set_ioeventfd_mmio(int fd, uint64_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%" PRIx64 " val=0x%x assign: %d size: %d match: %d" kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%x val=0x%x assign: %d size: %d match: %d" -kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d" +kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, uint32_t fd, uint64_t fd_offset, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " guest_memfd=%d" " guest_memfd_offset=0x%" PRIx64 " ret=%d" kvm_clear_dirty_log(uint32_t slot, uint64_t start, uint32_t size) "slot#%"PRId32" start 0x%"PRIx64" size 0x%"PRIx32 kvm_resample_fd_notify(int gsi) "gsi %d" kvm_dirty_ring_full(int id) "vcpu %d" diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h index 227b61fec3..3f3d13f816 100644 --- a/include/sysemu/kvm_int.h +++ b/include/sysemu/kvm_int.h @@ -30,6 +30,8 @@ typedef struct KVMSlot int as_id; /* Cache of the offset in ram address space */ ram_addr_t ram_start_offset; + int guest_memfd; + hwaddr guest_memfd_offset; } KVMSlot; typedef struct KVMMemoryUpdate { From bd3bcf6962b664ca3bf9c60fdcc4534e8e3d0641 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Wed, 20 Mar 2024 03:39:11 -0500 Subject: [PATCH 51/63] kvm/memory: Make memory type private by default if it has guest memfd backend KVM side leaves the memory to shared by default, which may incur the overhead of paging conversion on the first visit of each page. Because the expectation is that page is likely to private for the VMs that require private memory (has guest memfd). Explicitly set the memory to private when memory region has valid guest memfd backend. Signed-off-by: Xiaoyao Li Signed-off-by: Michael Roth Message-ID: <20240320083945.991426-16-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- accel/kvm/kvm-all.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 0386d4901f..f49b2b95b5 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -1431,6 +1431,16 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, strerror(-err)); abort(); } + + if (memory_region_has_guest_memfd(mr)) { + err = kvm_set_memory_attributes_private(start_addr, slot_size); + if (err) { + error_report("%s: failed to set memory attribute private: %s", + __func__, strerror(-err)); + exit(1); + } + } + start_addr += slot_size; ram_start_offset += slot_size; ram += slot_size; From 37662d85b0b7dded0ebdf6747bef6c3bb7ed6a0c Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Wed, 20 Mar 2024 03:39:03 -0500 Subject: [PATCH 52/63] HostMem: Add mechanism to opt in kvm guest memfd via MachineState Add a new member "guest_memfd" to memory backends. When it's set to true, it enables RAM_GUEST_MEMFD in ram_flags, thus private kvm guest_memfd will be allocated during RAMBlock allocation. Memory backend's @guest_memfd is wired with @require_guest_memfd field of MachineState. It avoid looking up the machine in phymem.c. MachineState::require_guest_memfd is supposed to be set by any VMs that requires KVM guest memfd as private memory, e.g., TDX VM. Signed-off-by: Xiaoyao Li Reviewed-by: David Hildenbrand Message-ID: <20240320083945.991426-8-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- backends/hostmem-file.c | 1 + backends/hostmem-memfd.c | 1 + backends/hostmem-ram.c | 1 + backends/hostmem.c | 1 + hw/core/machine.c | 5 +++++ include/hw/boards.h | 2 ++ include/sysemu/hostmem.h | 1 + 7 files changed, 12 insertions(+) diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c index ac3e433cbd..3c69db7946 100644 --- a/backends/hostmem-file.c +++ b/backends/hostmem-file.c @@ -85,6 +85,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) ram_flags |= fb->readonly ? RAM_READONLY_FD : 0; ram_flags |= fb->rom == ON_OFF_AUTO_ON ? RAM_READONLY : 0; ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; + ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0; ram_flags |= fb->is_pmem ? RAM_PMEM : 0; ram_flags |= RAM_NAMED_FILE; return memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name, diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c index 3923ea9364..745ead0034 100644 --- a/backends/hostmem-memfd.c +++ b/backends/hostmem-memfd.c @@ -55,6 +55,7 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) name = host_memory_backend_get_name(backend); ram_flags = backend->share ? RAM_SHARED : 0; ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; + ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0; return memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name, backend->size, ram_flags, fd, 0, errp); } diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c index d121249f0f..f7d81af783 100644 --- a/backends/hostmem-ram.c +++ b/backends/hostmem-ram.c @@ -30,6 +30,7 @@ ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) name = host_memory_backend_get_name(backend); ram_flags = backend->share ? RAM_SHARED : 0; ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; + ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0; return memory_region_init_ram_flags_nomigrate(&backend->mr, OBJECT(backend), name, backend->size, ram_flags, errp); diff --git a/backends/hostmem.c b/backends/hostmem.c index 81a72ce40b..eb9682b4a8 100644 --- a/backends/hostmem.c +++ b/backends/hostmem.c @@ -277,6 +277,7 @@ static void host_memory_backend_init(Object *obj) /* TODO: convert access to globals to compat properties */ backend->merge = machine_mem_merge(machine); backend->dump = machine_dump_guest_core(machine); + backend->guest_memfd = machine_require_guest_memfd(machine); backend->reserve = true; backend->prealloc_threads = machine->smp.cpus; } diff --git a/hw/core/machine.c b/hw/core/machine.c index a92bec2314..582c2df37a 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -1201,6 +1201,11 @@ bool machine_mem_merge(MachineState *machine) return machine->mem_merge; } +bool machine_require_guest_memfd(MachineState *machine) +{ + return machine->require_guest_memfd; +} + static char *cpu_slot_to_string(const CPUArchId *cpu) { GString *s = g_string_new(NULL); diff --git a/include/hw/boards.h b/include/hw/boards.h index 50e0cf4278..69c1ba45cf 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -36,6 +36,7 @@ bool machine_usb(MachineState *machine); int machine_phandle_start(MachineState *machine); bool machine_dump_guest_core(MachineState *machine); bool machine_mem_merge(MachineState *machine); +bool machine_require_guest_memfd(MachineState *machine); HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine); void machine_set_cpu_numa_node(MachineState *machine, const CpuInstanceProperties *props, @@ -370,6 +371,7 @@ struct MachineState { char *dt_compatible; bool dump_guest_core; bool mem_merge; + bool require_guest_memfd; bool usb; bool usb_disabled; char *firmware; diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h index 0e411aaa29..04b884bf42 100644 --- a/include/sysemu/hostmem.h +++ b/include/sysemu/hostmem.h @@ -74,6 +74,7 @@ struct HostMemoryBackend { uint64_t size; bool merge, dump, use_canonical_path; bool prealloc, is_mapped, share, reserve; + bool guest_memfd; uint32_t prealloc_threads; ThreadContext *prealloc_context; DECLARE_BITMAP(host_nodes, MAX_NODES + 1); From 852f0048f3ea9f14de18eb279a99fccb6d250e8f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 20 Mar 2024 17:45:29 +0100 Subject: [PATCH 53/63] RAMBlock: make guest_memfd require uncoordinated discard Some subsystems like VFIO might disable ram block discard, but guest_memfd uses discard operations to implement conversions between private and shared memory. Because of this, sequences like the following can result in stale IOMMU mappings: 1. allocate shared page 2. convert page shared->private 3. discard shared page 4. convert page private->shared 5. allocate shared page 6. issue DMA operations against that shared page This is not a use-after-free, because after step 3 VFIO is still pinning the page. However, DMA operations in step 6 will hit the old mapping that was allocated in step 1. Address this by taking ram_block_discard_is_enabled() into account when deciding whether or not to discard pages. Since kvm_convert_memory()/guest_memfd doesn't implement a RamDiscardManager handler to convey and replay discard operations, this is a case of uncoordinated discard, which is blocked/released by ram_block_discard_require(). Interestingly, this function had no use so far. Alternative approaches would be to block discard of shared pages, but this would cause guests to consume twice the memory if they use VFIO; or to implement a RamDiscardManager and only block uncoordinated discard, i.e. use ram_block_coordinated_discard_require(). [Commit message mostly by Michael Roth ] Signed-off-by: Paolo Bonzini --- system/physmem.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/system/physmem.c b/system/physmem.c index f5dfa20e57..5ebcf5be11 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -1846,6 +1846,13 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) assert(kvm_enabled()); assert(new_block->guest_memfd < 0); + if (ram_block_discard_require(true) < 0) { + error_setg_errno(errp, errno, + "cannot set up private guest memory: discard currently blocked"); + error_append_hint(errp, "Are you using assigned devices?\n"); + goto out_free; + } + new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length, 0, errp); if (new_block->guest_memfd < 0) { @@ -2109,6 +2116,7 @@ static void reclaim_ramblock(RAMBlock *block) if (block->guest_memfd >= 0) { close(block->guest_memfd); + ram_block_discard_require(false); } g_free(block); From b2e9426c04fdd32d93a3a37db6b0c2e67c88c335 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Wed, 20 Mar 2024 03:39:07 -0500 Subject: [PATCH 54/63] physmem: Introduce ram_block_discard_guest_memfd_range() When memory page is converted from private to shared, the original private memory is back'ed by guest_memfd. Introduce ram_block_discard_guest_memfd_range() for discarding memory in guest_memfd. Based on a patch by Isaku Yamahata . Signed-off-by: Xiaoyao Li Reviewed-by: David Hildenbrand Signed-off-by: Michael Roth Message-ID: <20240320083945.991426-12-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- include/exec/cpu-common.h | 2 ++ system/physmem.c | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 6346df17ce..6d5318895a 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -159,6 +159,8 @@ typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque); int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque); int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length); +int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start, + size_t length); #endif diff --git a/system/physmem.c b/system/physmem.c index 5ebcf5be11..c3d04ca921 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -3721,6 +3721,29 @@ err: return ret; } +int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start, + size_t length) +{ + int ret = -1; + +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE + ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + start, length); + + if (ret) { + ret = -errno; + error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)", + __func__, rb->idstr, start, length, ret); + } +#else + ret = -ENOSYS; + error_report("%s: fallocate not available %s:%" PRIx64 " +%zx (%d)", + __func__, rb->idstr, start, length, ret); +#endif + + return ret; +} + bool ramblock_is_pmem(RAMBlock *rb) { return rb->flags & RAM_PMEM; From c15e5684071d93174e446be318f49d8d59b15d6d Mon Sep 17 00:00:00 2001 From: Chao Peng Date: Wed, 20 Mar 2024 03:39:08 -0500 Subject: [PATCH 55/63] kvm: handle KVM_EXIT_MEMORY_FAULT Upon an KVM_EXIT_MEMORY_FAULT exit, userspace needs to do the memory conversion on the RAMBlock to turn the memory into desired attribute, switching between private and shared. Currently only KVM_MEMORY_EXIT_FLAG_PRIVATE in flags is valid when KVM_EXIT_MEMORY_FAULT happens. Note, KVM_EXIT_MEMORY_FAULT makes sense only when the RAMBlock has guest_memfd memory backend. Note, KVM_EXIT_MEMORY_FAULT returns with -EFAULT, so special handling is added. When page is converted from shared to private, the original shared memory can be discarded via ram_block_discard_range(). Note, shared memory can be discarded only when it's not back'ed by hugetlb because hugetlb is supposed to be pre-allocated and no need for discarding. Signed-off-by: Chao Peng Co-developed-by: Xiaoyao Li Signed-off-by: Xiaoyao Li Message-ID: <20240320083945.991426-13-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- accel/kvm/kvm-all.c | 98 +++++++++++++++++++++++++++++++++++++----- accel/kvm/trace-events | 2 + include/sysemu/kvm.h | 2 + 3 files changed, 92 insertions(+), 10 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index f49b2b95b5..9eef2c6400 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -2900,6 +2900,69 @@ static void kvm_eat_signals(CPUState *cpu) } while (sigismember(&chkset, SIG_IPI)); } +int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private) +{ + MemoryRegionSection section; + ram_addr_t offset; + MemoryRegion *mr; + RAMBlock *rb; + void *addr; + int ret = -1; + + trace_kvm_convert_memory(start, size, to_private ? "shared_to_private" : "private_to_shared"); + + if (!QEMU_PTR_IS_ALIGNED(start, qemu_real_host_page_size()) || + !QEMU_PTR_IS_ALIGNED(size, qemu_real_host_page_size())) { + return -1; + } + + if (!size) { + return -1; + } + + section = memory_region_find(get_system_memory(), start, size); + mr = section.mr; + if (!mr) { + return -1; + } + + if (!memory_region_has_guest_memfd(mr)) { + error_report("Converting non guest_memfd backed memory region " + "(0x%"HWADDR_PRIx" ,+ 0x%"HWADDR_PRIx") to %s", + start, size, to_private ? "private" : "shared"); + goto out_unref; + } + + if (to_private) { + ret = kvm_set_memory_attributes_private(start, size); + } else { + ret = kvm_set_memory_attributes_shared(start, size); + } + if (ret) { + goto out_unref; + } + + addr = memory_region_get_ram_ptr(mr) + section.offset_within_region; + rb = qemu_ram_block_from_host(addr, false, &offset); + + if (to_private) { + if (rb->page_size != qemu_real_host_page_size()) { + /* + * shared memory is backed by hugetlb, which is supposed to be + * pre-allocated and doesn't need to be discarded + */ + goto out_unref; + } + ret = ram_block_discard_range(rb, offset, size); + } else { + ret = ram_block_discard_guest_memfd_range(rb, offset, size); + } + +out_unref: + memory_region_unref(mr); + return ret; +} + int kvm_cpu_exec(CPUState *cpu) { struct kvm_run *run = cpu->kvm_run; @@ -2967,18 +3030,20 @@ int kvm_cpu_exec(CPUState *cpu) ret = EXCP_INTERRUPT; break; } - fprintf(stderr, "error: kvm run failed %s\n", - strerror(-run_ret)); + if (!(run_ret == -EFAULT && run->exit_reason == KVM_EXIT_MEMORY_FAULT)) { + fprintf(stderr, "error: kvm run failed %s\n", + strerror(-run_ret)); #ifdef TARGET_PPC - if (run_ret == -EBUSY) { - fprintf(stderr, - "This is probably because your SMT is enabled.\n" - "VCPU can only run on primary threads with all " - "secondary threads offline.\n"); - } + if (run_ret == -EBUSY) { + fprintf(stderr, + "This is probably because your SMT is enabled.\n" + "VCPU can only run on primary threads with all " + "secondary threads offline.\n"); + } #endif - ret = -1; - break; + ret = -1; + break; + } } trace_kvm_run_exit(cpu->cpu_index, run->exit_reason); @@ -3061,6 +3126,19 @@ int kvm_cpu_exec(CPUState *cpu) break; } break; + case KVM_EXIT_MEMORY_FAULT: + trace_kvm_memory_fault(run->memory_fault.gpa, + run->memory_fault.size, + run->memory_fault.flags); + if (run->memory_fault.flags & ~KVM_MEMORY_EXIT_FLAG_PRIVATE) { + error_report("KVM_EXIT_MEMORY_FAULT: Unknown flag 0x%" PRIx64, + (uint64_t)run->memory_fault.flags); + ret = -1; + break; + } + ret = kvm_convert_memory(run->memory_fault.gpa, run->memory_fault.size, + run->memory_fault.flags & KVM_MEMORY_EXIT_FLAG_PRIVATE); + break; default: ret = kvm_arch_handle_exit(cpu, run); break; diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events index e8c52cb9e7..681ccb667d 100644 --- a/accel/kvm/trace-events +++ b/accel/kvm/trace-events @@ -31,3 +31,5 @@ kvm_cpu_exec(void) "" kvm_interrupt_exit_request(void) "" kvm_io_window_exit(void) "" kvm_run_exit_system_event(int cpu_index, uint32_t event_type) "cpu_index %d, system_even_type %"PRIu32 +kvm_convert_memory(uint64_t start, uint64_t size, const char *msg) "start 0x%" PRIx64 " size 0x%" PRIx64 " %s" +kvm_memory_fault(uint64_t start, uint64_t size, uint64_t flags) "start 0x%" PRIx64 " size 0x%" PRIx64 " flags 0x%" PRIx64 diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 217f3fe17b..47f9e8be1b 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -542,4 +542,6 @@ int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp); int kvm_set_memory_attributes_private(hwaddr start, uint64_t size); int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size); +int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private); + #endif From c5d9425ef4da9f43fc0903905ad415456d1ab843 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Thu, 29 Feb 2024 01:36:54 -0500 Subject: [PATCH 56/63] kvm/tdx: Don't complain when converting vMMIO region to shared Because vMMIO region needs to be shared region, guest TD may explicitly convert such region from private to shared. Don't complain such conversion. Signed-off-by: Isaku Yamahata Signed-off-by: Xiaoyao Li Message-ID: <20240229063726.610065-34-xiaoyao.li@intel.com> Signed-off-by: Paolo Bonzini --- accel/kvm/kvm-all.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 9eef2c6400..0911154bf8 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -2927,9 +2927,22 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private) } if (!memory_region_has_guest_memfd(mr)) { - error_report("Converting non guest_memfd backed memory region " - "(0x%"HWADDR_PRIx" ,+ 0x%"HWADDR_PRIx") to %s", - start, size, to_private ? "private" : "shared"); + /* + * Because vMMIO region must be shared, guest TD may convert vMMIO + * region to shared explicitly. Don't complain such case. See + * memory_region_type() for checking if the region is MMIO region. + */ + if (!to_private && + !memory_region_is_ram(mr) && + !memory_region_is_ram_device(mr) && + !memory_region_is_rom(mr) && + !memory_region_is_romd(mr)) { + ret = 0; + } else { + error_report("Convert non guest_memfd backed memory region " + "(0x%"HWADDR_PRIx" ,+ 0x%"HWADDR_PRIx") to %s", + start, size, to_private ? "private" : "shared"); + } goto out_unref; } From 565f4768bb9cf840b2f8cca41483bb91aa3196a3 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Thu, 29 Feb 2024 01:36:55 -0500 Subject: [PATCH 57/63] kvm/tdx: Ignore memory conversion to shared of unassigned region TDX requires vMMIO region to be shared. For KVM, MMIO region is the region which kvm memslot isn't assigned to (except in-kernel emulation). qemu has the memory region for vMMIO at each device level. While OVMF issues MapGPA(to-shared) conservatively on 32bit PCI MMIO region, qemu doesn't find corresponding vMMIO region because it's before PCI device allocation and memory_region_find() finds the device region, not PCI bus region. It's safe to ignore MapGPA(to-shared) because when guest accesses those region they use GPA with shared bit set for vMMIO. Ignore memory conversion request of non-assigned region to shared and return success. Otherwise OVMF is confused and panics there. Signed-off-by: Isaku Yamahata Signed-off-by: Xiaoyao Li Message-ID: <20240229063726.610065-35-xiaoyao.li@intel.com> Signed-off-by: Paolo Bonzini --- accel/kvm/kvm-all.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 0911154bf8..d7281b93f3 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -2923,6 +2923,18 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private) section = memory_region_find(get_system_memory(), start, size); mr = section.mr; if (!mr) { + /* + * Ignore converting non-assigned region to shared. + * + * TDX requires vMMIO region to be shared to inject #VE to guest. + * OVMF issues conservatively MapGPA(shared) on 32bit PCI MMIO region, + * and vIO-APIC 0xFEC00000 4K page. + * OVMF assigns 32bit PCI MMIO region to + * [top of low memory: typically 2GB=0xC000000, 0xFC00000) + */ + if (!to_private) { + return 0; + } return -1; } From 7502ffb2f3240247e9426fd73b8422e5be54501a Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Wed, 27 Mar 2024 18:39:49 +0800 Subject: [PATCH 58/63] target/i386/host-cpu: Consolidate the use of warn_report_once() Use warn_report_once() to get rid of the static local variable "warned". Signed-off-by: Zhao Liu Message-ID: <20240327103951.3853425-2-zhao1.liu@linux.intel.com> Signed-off-by: Paolo Bonzini --- target/i386/host-cpu.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/target/i386/host-cpu.c b/target/i386/host-cpu.c index 92ecb7254b..280e427c01 100644 --- a/target/i386/host-cpu.c +++ b/target/i386/host-cpu.c @@ -55,18 +55,15 @@ static uint32_t host_cpu_adjust_phys_bits(X86CPU *cpu) { uint32_t host_phys_bits = host_cpu_phys_bits(); uint32_t phys_bits = cpu->phys_bits; - static bool warned; /* * Print a warning if the user set it to a value that's not the * host value. */ - if (phys_bits != host_phys_bits && phys_bits != 0 && - !warned) { - warn_report("Host physical bits (%u)" - " does not match phys-bits property (%u)", - host_phys_bits, phys_bits); - warned = true; + if (phys_bits != host_phys_bits && phys_bits != 0) { + warn_report_once("Host physical bits (%u)" + " does not match phys-bits property (%u)", + host_phys_bits, phys_bits); } if (cpu->host_phys_bits) { From 8e3991ebc865e659e32195286caf0ca14ffcf806 Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Wed, 27 Mar 2024 18:39:50 +0800 Subject: [PATCH 59/63] target/i386/cpu: Consolidate the use of warn_report_once() The difference between error_printf() and error_report() is the latter may contain more information, such as the name of the program ("qemu-system-x86_64"). Thus its variant error_report_once() and warn_report()'s variant warn_report_once() can be used here to print the information only once without a static local variable "ht_warned". Signed-off-by: Zhao Liu Message-ID: <20240327103951.3853425-3-zhao1.liu@linux.intel.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index eda15b0d4c..2845298378 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -7367,7 +7367,6 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) X86CPUClass *xcc = X86_CPU_GET_CLASS(dev); CPUX86State *env = &cpu->env; Error *local_err = NULL; - static bool ht_warned; unsigned requested_lbr_fmt; #if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) @@ -7610,13 +7609,11 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) */ if (IS_AMD_CPU(env) && !(env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_TOPOEXT) && - cs->nr_threads > 1 && !ht_warned) { - warn_report("This family of AMD CPU doesn't support " - "hyperthreading(%d)", - cs->nr_threads); - error_printf("Please configure -smp options properly" - " or try enabling topoext feature.\n"); - ht_warned = true; + cs->nr_threads > 1) { + warn_report_once("This family of AMD CPU doesn't support " + "hyperthreading(%d).", cs->nr_threads); + error_report_once("Please configure -smp options properly" + " or try enabling topoext feature."); } #ifndef CONFIG_USER_ONLY From aec202cb0eb800049c85428ba31566a9ef634cfc Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Wed, 27 Mar 2024 18:39:51 +0800 Subject: [PATCH 60/63] target/i386/cpu: Merge the warning and error messages for AMD HT check Currently, the difference between warn_report_once() and error_report_once() is the former has the "warning:" prefix, while the latter does not have a similar level prefix. At the meantime, considering that there is no error handling logic here, and the purpose of error_report_once() is only to prompt the user with an abnormal message, there is no need to use an error-level message here, and instead we can just use a warning. Therefore, downgrade the message in error_report_once() to warning, and merge it into the previous warn_report_once(). Signed-off-by: Zhao Liu Message-ID: <20240327103951.3853425-4-zhao1.liu@linux.intel.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 2845298378..fd6af0d763 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -7611,9 +7611,9 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) !(env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_TOPOEXT) && cs->nr_threads > 1) { warn_report_once("This family of AMD CPU doesn't support " - "hyperthreading(%d).", cs->nr_threads); - error_report_once("Please configure -smp options properly" - " or try enabling topoext feature."); + "hyperthreading(%d). Please configure -smp " + "options properly or try enabling topoext " + "feature.", cs->nr_threads); } #ifndef CONFIG_USER_ONLY From 94da7b6e9ad8da2b0cb2ef86a64d9fa1d6c8320c Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Thu, 18 Apr 2024 18:07:16 +0800 Subject: [PATCH 61/63] accel/tcg/icount-common: Consolidate the use of warn_report_once() Use warn_report_once() to get rid of the static local variable "notified". Signed-off-by: Zhao Liu Message-ID: <20240418100716.1085491-1-zhao1.liu@linux.intel.com> Signed-off-by: Paolo Bonzini --- accel/tcg/icount-common.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/accel/tcg/icount-common.c b/accel/tcg/icount-common.c index a4a747d1dc..8d3d3a7e9d 100644 --- a/accel/tcg/icount-common.c +++ b/accel/tcg/icount-common.c @@ -336,10 +336,8 @@ void icount_start_warp_timer(void) deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, ~QEMU_TIMER_ATTR_EXTERNAL); if (deadline < 0) { - static bool notified; - if (!icount_sleep && !notified) { - warn_report("icount sleep disabled and no active timers"); - notified = true; + if (!icount_sleep) { + warn_report_once("icount sleep disabled and no active timers"); } return; } From 9c05071719ca3a479b677e58db8ab0c5ad3f9b83 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 18 Apr 2024 11:14:31 +0200 Subject: [PATCH 62/63] pythondeps.toml: warn about updates needed to docs/requirements.txt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit docs/requirements.txt is expected by readthedocs and should be in sync with pythondeps.toml. Add a comment to both. Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini --- docs/requirements.txt | 3 +++ pythondeps.toml | 1 + 2 files changed, 4 insertions(+) diff --git a/docs/requirements.txt b/docs/requirements.txt index 691e5218ec..02583f209a 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,2 +1,5 @@ +# Used by readthedocs.io +# Should be in sync with the "installed" key of pythondeps.toml + sphinx==5.3.0 sphinx_rtd_theme==1.1.1 diff --git a/pythondeps.toml b/pythondeps.toml index 0e88415999..9c16602d30 100644 --- a/pythondeps.toml +++ b/pythondeps.toml @@ -22,6 +22,7 @@ meson = { accepted = ">=0.63.0", installed = "1.2.3", canary = "meson" } [docs] +# Please keep the installed versions in sync with docs/requirements.txt sphinx = { accepted = ">=1.6", installed = "5.3.0", canary = "sphinx-build" } sphinx_rtd_theme = { accepted = ">=0.5", installed = "1.1.1" } From 7653b44534d3267fa63ebc9d7221eaa7b48bf5ae Mon Sep 17 00:00:00 2001 From: Mark Cave-Ayland Date: Fri, 19 Apr 2024 20:51:47 +0100 Subject: [PATCH 63/63] target/i386/translate.c: always write 32-bits for SGDT and SIDT The various Intel CPU manuals claim that SGDT and SIDT can write either 24-bits or 32-bits depending upon the operand size, but this is incorrect. Not only do the Intel CPU manuals give contradictory information between processor revisions, but this information doesn't even match real-life behaviour. In fact, tests on real hardware show that the CPU always writes 32-bits for SGDT and SIDT, and this behaviour is required for at least OS/2 Warp and WFW 3.11 with Win32s to function correctly. Remove the masking applied due to the operand size for SGDT and SIDT so that the TCG behaviour matches the behaviour on real hardware. Signed-off-by: Mark Cave-Ayland Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2198 -- MCA: Whilst I don't have a copy of OS/2 Warp handy, I've confirmed that this patch fixes the issue in WFW 3.11 with Win32s. For more technical information I highly recommend the excellent write-up at https://www.os2museum.com/wp/sgdtsidt-fiction-and-reality/. Message-ID: <20240419195147.434894-1-mark.cave-ayland@ilande.co.uk> Signed-off-by: Paolo Bonzini --- target/i386/tcg/translate.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 76a42c679c..c05d9e5225 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -5846,9 +5846,10 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) gen_op_st_v(s, MO_16, s->T0, s->A0); gen_add_A0_im(s, 2); tcg_gen_ld_tl(s->T0, tcg_env, offsetof(CPUX86State, gdt.base)); - if (dflag == MO_16) { - tcg_gen_andi_tl(s->T0, s->T0, 0xffffff); - } + /* + * NB: Despite a confusing description in Intel CPU documentation, + * all 32-bits are written regardless of operand size. + */ gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0); break; @@ -5901,9 +5902,10 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) gen_op_st_v(s, MO_16, s->T0, s->A0); gen_add_A0_im(s, 2); tcg_gen_ld_tl(s->T0, tcg_env, offsetof(CPUX86State, idt.base)); - if (dflag == MO_16) { - tcg_gen_andi_tl(s->T0, s->T0, 0xffffff); - } + /* + * NB: Despite a confusing description in Intel CPU documentation, + * all 32-bits are written regardless of operand size. + */ gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0); break;