From f31d29bbb79af25e69cff653292c3d760d333639 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= Date: Mon, 15 Jan 2024 12:08:07 +0100 Subject: [PATCH 1/9] platform: netlink: add devlink support Add support for Devlink, which is just another family of Generic Netlink like nl80211. Implement get_eswitch_mode and set_eswitch_mode to allow changing between legacy SRIOV and switchdev modes. Devlink's purpose is to allow querying and configuring stuff related to a piece of hardware but not to any of the usual Linux device classes. For example, nowadays the Smart NICs normally allow to change the eswitch mode per PF, because their hardware implements one eswitch per PF, but future models might have a single eswitch for all the physical and virtual ports of the NIC allowing more advanced bridge offloads. Regarding the above example, for the moment we only support PCI network devices with the "one eswitch per PF" model. The reason is that currently NM only knows about netdevs so dealing with "devlink devices" that doesn't map 1-1 with a netdev would require new mechanisms to understand what they are and their relation with the netdevs that NM manage. We will deal with that use cases when they arise and we have more information about the right way to support them. --- Makefile.am | 2 + src/libnm-platform/devlink/nm-devlink.c | 340 ++++++++++++++++++++++++ src/libnm-platform/devlink/nm-devlink.h | 22 ++ src/libnm-platform/meson.build | 1 + src/libnm-platform/nm-platform.c | 4 + src/libnm-platform/nm-platform.h | 1 + 6 files changed, 370 insertions(+) create mode 100644 src/libnm-platform/devlink/nm-devlink.c create mode 100644 src/libnm-platform/devlink/nm-devlink.h diff --git a/Makefile.am b/Makefile.am index 0a9367d479..d91e983e9c 100644 --- a/Makefile.am +++ b/Makefile.am @@ -672,6 +672,8 @@ src_libnm_platform_libnm_platform_la_SOURCES = \ src/libnm-platform/nmp-object.h \ src/libnm-platform/nmp-plobj.c \ src/libnm-platform/nmp-plobj.h \ + src/libnm-platform/devlink/nm-devlink.c \ + src/libnm-platform/devlink/nm-devlink.h \ src/libnm-platform/wifi/nm-wifi-utils-nl80211.c \ src/libnm-platform/wifi/nm-wifi-utils-nl80211.h \ src/libnm-platform/wifi/nm-wifi-utils-private.h \ diff --git a/src/libnm-platform/devlink/nm-devlink.c b/src/libnm-platform/devlink/nm-devlink.c new file mode 100644 index 0000000000..dc68bafde5 --- /dev/null +++ b/src/libnm-platform/devlink/nm-devlink.c @@ -0,0 +1,340 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* + * Copyright (C) 2024 Red Hat, Inc. + */ + +#include "libnm-glib-aux/nm-default-glib-i18n-lib.h" + +#include "nm-devlink.h" + +#include +#include + +#include "libnm-log-core/nm-logging.h" +#include "libnm-platform/nm-netlink.h" +#include "libnm-platform/nm-platform.h" +#include "libnm-platform/nm-platform-utils.h" + +#define _NMLOG_PREFIX_NAME "devlink" +#define _NMLOG_DOMAIN LOGD_PLATFORM | LOGD_DEVICE +#define _NMLOG(level, ...) \ + G_STMT_START \ + { \ + char _ifname_buf[IFNAMSIZ]; \ + const char *_ifname = self ? nmp_utils_if_indextoname(self->ifindex, _ifname_buf) : NULL; \ + \ + nm_log((level), \ + _NMLOG_DOMAIN, \ + _ifname ?: NULL, \ + NULL, \ + "%s%s%s%s: " _NM_UTILS_MACRO_FIRST(__VA_ARGS__), \ + _NMLOG_PREFIX_NAME, \ + NM_PRINT_FMT_QUOTED(_ifname, " (", _ifname, ")", "") \ + _NM_UTILS_MACRO_REST(__VA_ARGS__)); \ + } \ + G_STMT_END + +#define CB_RESULT_PENDING 0 +#define CB_RESULT_OK 1 + +struct _NMDevlink { + NMPlatform *plat; + struct nl_sock *genl_sock_sync; + guint16 genl_family_id; + int ifindex; +}; + +/** + * nm_devlink_new: + * @platform: the #NMPlatform that will use this #NMDevlink instance + * @genl_sock_sync: the netlink socket (will be used synchronously) + * @ifindex: the kernel's netdev ifindex corresponding to the devlink device + * + * Create a new #NMDevlink instance to make devlink queries regarding a specific + * device. + * + * Returns: (transfer full): the allocated new #NMDevlink + */ +NMDevlink * +nm_devlink_new(NMPlatform *platform, struct nl_sock *genl_sock_sync, int ifindex) +{ + NMDevlink *self = g_new(NMDevlink, 1); + + self->plat = platform; + self->genl_sock_sync = genl_sock_sync; + self->genl_family_id = nm_platform_genl_get_family_id(platform, NMP_GENL_FAMILY_TYPE_DEVLINK); + self->ifindex = ifindex; + return self; +} + +/** + * nm_devlink_get_dev_identifier: + * @self: the #NMDevlink + * @out_bus: (out): the "bus_name" part of the devlink device identifier + * @out_addr: (out): the "bus_addr" part of the devlink device identifier + * @error: (optional): the error location + * + * Get the devlink device identifier of the device for which the #NMDevlink was + * created (with the @ifindex argument of nm_devlink_get_new()). A devlink device + * is identified as "bus_name/bus_addr" (i.e. "pci/0000:65:00.0"). This function + * provides both parts separately. + * + * Note that here we only get the potential devlink device identifier. The real devlink + * device might not even exist if the hw doesn't implement devlink or the netdev + * doesn't have a 1-1 corresponding devlink device (i.e. because it's a VF or + * because the hw uses a "one eswitch for many ports" model). + * + * Also note that currently only PCI devices are supported, an error will be + * returned for other kind of devices. + * + * Returns: FALSE in case of error, TRUE otherwise + */ +gboolean +nm_devlink_get_dev_identifier(NMDevlink *self, char **out_bus, char **out_addr, GError **error) +{ + const char *bus; + char sbuf[IFNAMSIZ]; + NMPUtilsEthtoolDriverInfo ethtool_driver_info; + + nm_assert(out_bus != NULL && out_addr != NULL); + nm_assert(!error || !*error); + + if (!nm_platform_link_get_udev_property(self->plat, self->ifindex, "ID_BUS", &bus)) { + g_set_error(error, + NM_UTILS_ERROR, + NM_UTILS_ERROR_UNKNOWN, + "Can't get udev info for device '%s'", + nmp_utils_if_indextoname(self->ifindex, sbuf)); + return FALSE; + } + + if (!nm_streq0(bus, "pci")) { + g_set_error(error, + NM_UTILS_ERROR, + NM_UTILS_ERROR_UNKNOWN, + "Devlink is only supported for PCI but device '%s' has bus name '%s'", + nmp_utils_if_indextoname(self->ifindex, sbuf), + bus); + return FALSE; + } + + if (!nmp_utils_ethtool_get_driver_info(self->ifindex, ðtool_driver_info)) { + g_set_error(error, + NM_UTILS_ERROR, + NM_UTILS_ERROR_UNKNOWN, + "Can't get ethtool driver info for device '%s'", + nmp_utils_if_indextoname(self->ifindex, sbuf)); + return FALSE; + } + + *out_bus = g_strdup("pci"); + *out_addr = g_strdup(ethtool_driver_info._private_bus_info); + return TRUE; +} + +static struct nl_msg * +devlink_alloc_msg(NMDevlink *self, uint8_t cmd, uint16_t flags) +{ + nm_auto_nlmsg struct nl_msg *msg = nlmsg_alloc(0); + if (!msg) + return NULL; + + genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, self->genl_family_id, 0, flags, cmd, 0); + return g_steal_pointer(&msg); +} + +static int +ack_cb_handler(const struct nl_msg *msg, void *data) +{ + int *result = data; + *result = CB_RESULT_OK; + return NL_STOP; +} + +static int +finish_cb_handler(const struct nl_msg *msg, void *data) +{ + int *result = data; + *result = CB_RESULT_OK; + return NL_SKIP; +} + +static int +err_cb_handler(const struct sockaddr_nl *nla, const struct nlmsgerr *err, void *data) +{ + void **args = data; + NMDevlink *self = args[0]; + int *result = args[1]; + char **err_msg = args[2]; + const char *extack_msg = NULL; + + *result = err->error; + nlmsg_parse_error(nlmsg_undata(err), &extack_msg); + + _LOGT("error response (%d) %s", err->error, extack_msg ?: nm_strerror(err->error)); + + if (err_msg) + *err_msg = g_strdup(extack_msg ?: nm_strerror(err->error)); + + return NL_SKIP; +} + +static int +devlink_send_and_recv(NMDevlink *self, + struct nl_msg *msg, + int (*valid_handler)(const struct nl_msg *, void *), + void *valid_data, + char **err_msg) +{ + int nle; + int cb_result = CB_RESULT_PENDING; + void *err_arg[] = {self, &cb_result, err_msg}; + const struct nl_cb cb = { + .err_cb = err_cb_handler, + .err_arg = err_arg, + .finish_cb = finish_cb_handler, + .finish_arg = &cb_result, + .ack_cb = ack_cb_handler, + .ack_arg = &cb_result, + .valid_cb = valid_handler, + .valid_arg = valid_data, + }; + + g_return_val_if_fail(msg != NULL, -ENOMEM); + + if (err_msg) + *err_msg = NULL; + + nle = nl_send_auto(self->genl_sock_sync, msg); + if (nle < 0) + goto out; + + while (cb_result == CB_RESULT_PENDING) { + nle = nl_recvmsgs(self->genl_sock_sync, &cb); + if (nle < 0 && nle != -EAGAIN) { + _LOGW("nl_recvmsgs() error: (%d) %s", nle, nm_strerror(nle)); + break; + } + } + +out: + if (nle < 0 && err_msg && *err_msg == NULL) + *err_msg = strdup(nm_strerror(nle)); + + if (nle >= 0 && cb_result < 0) + nle = cb_result; + return nle; +} + +static int +devlink_parse_eswitch_mode(const struct nl_msg *msg, void *data) +{ + static const struct nla_policy eswitch_policy[] = { + [DEVLINK_ATTR_ESWITCH_MODE] = {.type = NLA_U16}, + }; + enum devlink_eswitch_mode *eswitch_mode = data; + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *tb[G_N_ELEMENTS(eswitch_policy)]; + + if (nla_parse_arr(tb, genlmsg_attrdata(gnlh, 0), genlmsg_attrlen(gnlh, 0), eswitch_policy) < 0) + return NL_SKIP; + + if (!tb[DEVLINK_ATTR_ESWITCH_MODE]) + return NL_SKIP; + + *eswitch_mode = nla_get_u16(tb[DEVLINK_ATTR_ESWITCH_MODE]); + return NL_OK; +} + +/* + * nm_devlink_get_eswitch_mode: + * @self: the #NMDevlink + * @error: the error location + * + * Get the eswitch mode of the device related to the #NMDevlink instance. Note + * that this might be unsupported by the device (see nm_devlink_get_dev()). + * + * Returns: the eswitch mode of the device, or <0 in case of error + */ +int +nm_devlink_get_eswitch_mode(NMDevlink *self, GError **error) +{ + nm_auto_nlmsg struct nl_msg *msg = NULL; + gs_free char *bus = NULL; + gs_free char *addr = NULL; + enum devlink_eswitch_mode eswitch_mode; + gs_free char *err_msg = NULL; + int rc; + + if (!nm_devlink_get_dev_identifier(self, &bus, &addr, error)) + return -1; + + msg = devlink_alloc_msg(self, DEVLINK_CMD_ESWITCH_GET, 0); + NLA_PUT_STRING(msg, DEVLINK_ATTR_BUS_NAME, bus); + NLA_PUT_STRING(msg, DEVLINK_ATTR_DEV_NAME, addr); + + rc = devlink_send_and_recv(self, msg, devlink_parse_eswitch_mode, &eswitch_mode, &err_msg); + if (rc < 0) { + g_set_error(error, + NM_UTILS_ERROR, + NM_UTILS_ERROR_UNKNOWN, + "devlink: get-eswitch-mode: failed (%d) %s", + rc, + err_msg); + return rc; + } + + _LOGD("get-eswitch-mode: success"); + + return (int) eswitch_mode; + +nla_put_failure: + g_return_val_if_reached(-1); +} + +/* + * nm_devlink_set_eswitch_mode: + * @self: the #NMDevlink + * @mode: the eswitch mode to set + * @error: the error location + * + * Set the eswitch mode of the device related to the #NMDevlink instance. Note + * that this might be unsupported by the device (see nm_devlink_get_dev()). + * + * Returns: FALSE in case of error, TRUE otherwise + */ +gboolean +nm_devlink_set_eswitch_mode(NMDevlink *self, enum devlink_eswitch_mode mode, GError **error) +{ + nm_auto_nlmsg struct nl_msg *msg = NULL; + gs_free char *bus = NULL; + gs_free char *addr = NULL; + gs_free char *err_msg = NULL; + int rc; + + if (!nm_devlink_get_dev_identifier(self, &bus, &addr, error)) + return FALSE; + + msg = devlink_alloc_msg(self, DEVLINK_CMD_ESWITCH_SET, 0); + NLA_PUT_STRING(msg, DEVLINK_ATTR_BUS_NAME, bus); + NLA_PUT_STRING(msg, DEVLINK_ATTR_DEV_NAME, addr); + NLA_PUT_U16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode); + + rc = devlink_send_and_recv(self, msg, NULL, NULL, &err_msg); + if (rc < 0) { + g_set_error(error, + NM_UTILS_ERROR, + NM_UTILS_ERROR_UNKNOWN, + "devlink: set-eswitch-mode: failed (%d) %s", + rc, + err_msg); + return FALSE; + } + + _LOGD("set-eswitch-mode: success"); + + return TRUE; + +nla_put_failure: + g_return_val_if_reached(FALSE); +} diff --git a/src/libnm-platform/devlink/nm-devlink.h b/src/libnm-platform/devlink/nm-devlink.h new file mode 100644 index 0000000000..789b13d3ab --- /dev/null +++ b/src/libnm-platform/devlink/nm-devlink.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* + * Copyright (C) 2024 Red Hat, Inc. + */ + +#ifndef __NMP_DEVLINK_H__ +#define __NMP_DEVLINK_H__ + +#include + +struct nl_sock; +typedef struct _NMPlatform NMPlatform; +typedef struct _NMDevlink NMDevlink; + +NMDevlink *nm_devlink_new(NMPlatform *platform, struct nl_sock *genl_sock_sync, int ifindex); +gboolean +nm_devlink_get_dev_identifier(NMDevlink *self, char **out_bus, char **out_addr, GError **error); +int nm_devlink_get_eswitch_mode(NMDevlink *self, GError **error); +gboolean +nm_devlink_set_eswitch_mode(NMDevlink *self, enum devlink_eswitch_mode mode, GError **error); + +#endif /* __NMP_DEVLINK_H__ */ \ No newline at end of file diff --git a/src/libnm-platform/meson.build b/src/libnm-platform/meson.build index 696ca1a6fa..7b6ad04266 100644 --- a/src/libnm-platform/meson.build +++ b/src/libnm-platform/meson.build @@ -12,6 +12,7 @@ libnm_platform = static_library( 'nmp-netns.c', 'nmp-object.c', 'nmp-plobj.c', + 'devlink/nm-devlink.c', 'wifi/nm-wifi-utils-nl80211.c', 'wifi/nm-wifi-utils.c', 'wpan/nm-wpan-utils.c', diff --git a/src/libnm-platform/nm-platform.c b/src/libnm-platform/nm-platform.c index 1411fe9e10..acd8675ceb 100644 --- a/src/libnm-platform/nm-platform.c +++ b/src/libnm-platform/nm-platform.c @@ -452,6 +452,10 @@ _nm_platform_kernel_support_init(NMPlatformKernelSupportType type, int value) /*****************************************************************************/ const NMPGenlFamilyInfo nmp_genl_family_infos[_NMP_GENL_FAMILY_TYPE_NUM] = { + [NMP_GENL_FAMILY_TYPE_DEVLINK] = + { + .name = "devlink", + }, [NMP_GENL_FAMILY_TYPE_ETHTOOL] = { .name = "ethtool", diff --git a/src/libnm-platform/nm-platform.h b/src/libnm-platform/nm-platform.h index a6e60bd400..cc8d524bf6 100644 --- a/src/libnm-platform/nm-platform.h +++ b/src/libnm-platform/nm-platform.h @@ -1084,6 +1084,7 @@ nm_platform_kernel_support_get(NMPlatformKernelSupportType type) } typedef enum { + NMP_GENL_FAMILY_TYPE_DEVLINK, NMP_GENL_FAMILY_TYPE_ETHTOOL, NMP_GENL_FAMILY_TYPE_MPTCP_PM, NMP_GENL_FAMILY_TYPE_NL80211, From c61c87f8a6b6e49a4912521aa91562f299f539a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= Date: Thu, 8 Feb 2024 09:45:37 +0100 Subject: [PATCH 2/9] sriov: add eswitch-mode property Add property to allow changing the eswitch mode between legacy SRIOV and switchdev. Allow also to set "preserve" to prevent NM from modifying the eswitch mode. --- src/libnm-client-impl/libnm.ver | 2 + ...gen-metadata-nm-settings-libnm-core.xml.in | 4 ++ src/libnm-core-impl/nm-setting-sriov.c | 47 ++++++++++++++++++- src/libnm-core-public/nm-setting-sriov.h | 17 +++++++ src/libnmc-setting/nm-meta-setting-desc.c | 3 ++ src/libnmc-setting/settings-docs.h.in | 1 + .../gen-metadata-nm-settings-nmcli.xml.in | 4 ++ 7 files changed, 77 insertions(+), 1 deletion(-) diff --git a/src/libnm-client-impl/libnm.ver b/src/libnm-client-impl/libnm.ver index 052e701854..17fc25c6c2 100644 --- a/src/libnm-client-impl/libnm.ver +++ b/src/libnm-client-impl/libnm.ver @@ -1967,4 +1967,6 @@ global: nm_setting_hsr_new; nm_setting_ip_config_get_dhcp_dscp; nm_setting_get_enum_property_type; + nm_setting_sriov_get_eswitch_mode; + nm_sriov_eswitch_mode_get_type; } libnm_1_44_0; diff --git a/src/libnm-core-impl/gen-metadata-nm-settings-libnm-core.xml.in b/src/libnm-core-impl/gen-metadata-nm-settings-libnm-core.xml.in index 033639e063..f3fa8e162d 100644 --- a/src/libnm-core-impl/gen-metadata-nm-settings-libnm-core.xml.in +++ b/src/libnm-core-impl/gen-metadata-nm-settings-libnm-core.xml.in @@ -2198,6 +2198,10 @@ dbus-type="i" gprop-type="NMTernary" /> + autoprobe_drivers; } +/** + * nm_setting_sriov_get_eswitch_mode: + * @setting: the #NMSettingSriov + * + * Returns: the value contained in the #NMSettingSriov:eswitch-mode property. + * + * Since: 1.46 + */ +NMSriovEswitchMode +nm_setting_sriov_get_eswitch_mode(NMSettingSriov *setting) +{ + g_return_val_if_fail(NM_IS_SETTING_SRIOV(setting), NM_SRIOV_ESWITCH_MODE_PRESERVE); + + return setting->eswitch_mode; +} + static int vf_index_compare(gconstpointer a, gconstpointer b) { @@ -1331,6 +1353,29 @@ nm_setting_sriov_class_init(NMSettingSriovClass *klass) NMSettingSriov, autoprobe_drivers); + /** + * NMSettingSriov:eswitch-mode + * + * Select the eswitch mode of the device. Currently it's only supported for + * PCI PF devices, and only if the eswitch device is managed from the same + * PCI address than the PF. + * + * If set to %NM_SRIOV_ESWITCH_MODE_PRESERVE (default) the eswitch mode won't be + * modified by NetworkManager. + * + * Since: 1.46 + */ + _nm_setting_property_define_direct_enum(properties_override, + obj_properties, + NM_SETTING_SRIOV_ESWITCH_MODE, + PROP_ESWITCH_MODE, + NM_TYPE_SRIOV_ESWITCH_MODE, + NM_SRIOV_ESWITCH_MODE_PRESERVE, + NM_SETTING_PARAM_FUZZY_IGNORE, + NULL, + NMSettingSriov, + eswitch_mode); + g_object_class_install_properties(object_class, _PROPERTY_ENUMS_LAST, obj_properties); _nm_setting_class_commit(setting_class, diff --git a/src/libnm-core-public/nm-setting-sriov.h b/src/libnm-core-public/nm-setting-sriov.h index 071b983775..16e493258f 100644 --- a/src/libnm-core-public/nm-setting-sriov.h +++ b/src/libnm-core-public/nm-setting-sriov.h @@ -29,6 +29,7 @@ G_BEGIN_DECLS #define NM_SETTING_SRIOV_TOTAL_VFS "total-vfs" #define NM_SETTING_SRIOV_VFS "vfs" #define NM_SETTING_SRIOV_AUTOPROBE_DRIVERS "autoprobe-drivers" +#define NM_SETTING_SRIOV_ESWITCH_MODE "eswitch-mode" #define NM_SRIOV_VF_ATTRIBUTE_MAC "mac" #define NM_SRIOV_VF_ATTRIBUTE_SPOOF_CHECK "spoof-check" @@ -53,6 +54,20 @@ typedef enum { NM_SRIOV_VF_VLAN_PROTOCOL_802_1AD = 1, } NMSriovVFVlanProtocol; +/** + * NMSriovEswitchMode: + * @NM_SRIOV_ESWITCH_MODE_PRESERVE: don't modify current eswitch mode + * @NM_SRIOV_ESWITCH_MODE_LEGACY: use legacy SRIOV + * @NM_SRIOV_ESWITCH_MODE_SWITCHDEV: use switchdev mode + * + * Since: 1.46 + */ +typedef enum { + NM_SRIOV_ESWITCH_MODE_PRESERVE = -1, + NM_SRIOV_ESWITCH_MODE_LEGACY = 0, + NM_SRIOV_ESWITCH_MODE_SWITCHDEV = 1, +} NMSriovEswitchMode; + NM_AVAILABLE_IN_1_14 GType nm_setting_sriov_get_type(void); NM_AVAILABLE_IN_1_14 @@ -73,6 +88,8 @@ NM_AVAILABLE_IN_1_14 void nm_setting_sriov_clear_vfs(NMSettingSriov *setting); NM_AVAILABLE_IN_1_14 NMTernary nm_setting_sriov_get_autoprobe_drivers(NMSettingSriov *setting); +NM_AVAILABLE_IN_1_46 +NMSriovEswitchMode nm_setting_sriov_get_eswitch_mode(NMSettingSriov *setting); NM_AVAILABLE_IN_1_14 gboolean nm_sriov_vf_add_vlan(NMSriovVF *vf, guint vlan_id); diff --git a/src/libnmc-setting/nm-meta-setting-desc.c b/src/libnmc-setting/nm-meta-setting-desc.c index 1e8f8809a3..8a53af36e2 100644 --- a/src/libnmc-setting/nm-meta-setting-desc.c +++ b/src/libnmc-setting/nm-meta-setting-desc.c @@ -7363,6 +7363,9 @@ static const NMMetaPropertyInfo *const property_infos_SRIOV[] = { PROPERTY_INFO_WITH_DESC (NM_SETTING_SRIOV_AUTOPROBE_DRIVERS, .property_type = &_pt_gobject_ternary, ), + PROPERTY_INFO_WITH_DESC (NM_SETTING_SRIOV_ESWITCH_MODE, + .property_type = &_pt_gobject_enum, + ), NULL }; diff --git a/src/libnmc-setting/settings-docs.h.in b/src/libnmc-setting/settings-docs.h.in index e093036dc1..7e88de3a71 100644 --- a/src/libnmc-setting/settings-docs.h.in +++ b/src/libnmc-setting/settings-docs.h.in @@ -310,6 +310,7 @@ #define DESCRIBE_DOC_NM_SETTING_SERIAL_SEND_DELAY N_("Time to delay between each byte sent to the modem, in microseconds.") #define DESCRIBE_DOC_NM_SETTING_SERIAL_STOPBITS N_("Number of stop bits for communication on the serial port. Either 1 or 2. The 1 in \"8n1\" for example.") #define DESCRIBE_DOC_NM_SETTING_SRIOV_AUTOPROBE_DRIVERS N_("Whether to autoprobe virtual functions by a compatible driver. If set to \"true\" (1), the kernel will try to bind VFs to a compatible driver and if this succeeds a new network interface will be instantiated for each VF. If set to \"false\" (0), VFs will not be claimed and no network interfaces will be created for them. When set to \"default\" (-1), the global default is used; in case the global default is unspecified it is assumed to be \"true\" (1).") +#define DESCRIBE_DOC_NM_SETTING_SRIOV_ESWITCH_MODE N_("Select the eswitch mode of the device. Currently it's only supported for PCI PF devices, and only if the eswitch device is managed from the same PCI address than the PF. If set to \"preserve\" (-1) (default) the eswitch mode won't be modified by NetworkManager.") #define DESCRIBE_DOC_NM_SETTING_SRIOV_TOTAL_VFS N_("The total number of virtual functions to create. Note that when the sriov setting is present NetworkManager enforces the number of virtual functions on the interface (also when it is zero) during activation and resets it upon deactivation. To prevent any changes to SR-IOV parameters don't add a sriov setting to the connection.") #define DESCRIBE_DOC_NM_SETTING_SRIOV_VFS N_("Array of virtual function descriptors. Each VF descriptor is a dictionary mapping attribute names to GVariant values. The 'index' entry is mandatory for each VF. When represented as string a VF is in the form: \"INDEX [ATTR=VALUE[ ATTR=VALUE]...]\". for example: \"2 mac=00:11:22:33:44:55 spoof-check=true\". Multiple VFs can be specified using a comma as separator. Currently, the following attributes are supported: mac, spoof-check, trust, min-tx-rate, max-tx-rate, vlans. The \"vlans\" attribute is represented as a semicolon-separated list of VLAN descriptors, where each descriptor has the form \"ID[.PRIORITY[.PROTO]]\". PROTO can be either 'q' for 802.1Q (the default) or 'ad' for 802.1ad.") #define DESCRIBE_DOC_NM_SETTING_TC_CONFIG_QDISCS N_("Array of TC queueing disciplines. When the \"tc\" setting is present, qdiscs from this property are applied upon activation. If the property is empty, all qdiscs are removed and the device will only have the default qdisc assigned by kernel according to the \"net.core.default_qdisc\" sysctl. If the \"tc\" setting is not present, NetworkManager doesn't touch the qdiscs present on the interface.") diff --git a/src/nmcli/gen-metadata-nm-settings-nmcli.xml.in b/src/nmcli/gen-metadata-nm-settings-nmcli.xml.in index 9c82ea12b7..177b1a5e78 100644 --- a/src/nmcli/gen-metadata-nm-settings-nmcli.xml.in +++ b/src/nmcli/gen-metadata-nm-settings-nmcli.xml.in @@ -1820,6 +1820,10 @@ nmcli-description="Whether to autoprobe virtual functions by a compatible driver. If set to "true" (1), the kernel will try to bind VFs to a compatible driver and if this succeeds a new network interface will be instantiated for each VF. If set to "false" (0), VFs will not be claimed and no network interfaces will be created for them. When set to "default" (-1), the global default is used; in case the global default is unspecified it is assumed to be "true" (1)." format="ternary" values="true/yes/on, false/no/off, default/unknown" /> + Date: Thu, 8 Feb 2024 09:46:47 +0100 Subject: [PATCH 3/9] sriov: set the devlink's eswitch mode Use the new property sriov.eswitch-mode to select between legacy SRIOV and switchdev mode. --- src/core/devices/nm-device.c | 20 ++++++++++++++++-- src/libnm-base/nm-base.h | 7 +++++++ src/libnm-platform/nm-linux-platform.c | 28 ++++++++++++++++++++++++-- src/libnm-platform/nm-platform.c | 2 ++ src/libnm-platform/nm-platform.h | 2 ++ 5 files changed, 55 insertions(+), 4 deletions(-) diff --git a/src/core/devices/nm-device.c b/src/core/devices/nm-device.c index 35011a3db9..07a763429e 100644 --- a/src/core/devices/nm-device.c +++ b/src/core/devices/nm-device.c @@ -139,6 +139,7 @@ typedef struct { gpointer callback_data; guint num_vfs; NMOptionBool autoprobe; + NMSriovEswitchMode eswitch_mode; } SriovOp; typedef enum { @@ -7708,6 +7709,7 @@ sriov_op_start(NMDevice *self, SriovOp *op) priv->ifindex, op->num_vfs, op->autoprobe, + (_NMSriovEswitchMode) op->eswitch_mode, sriov_op_cb, op, op->cancellable); @@ -7771,6 +7773,7 @@ static void sriov_op_queue(NMDevice *self, guint num_vfs, NMOptionBool autoprobe, + NMSriovEswitchMode eswitch_mode, NMPlatformAsyncCallback callback, gpointer callback_data) { @@ -7799,6 +7802,7 @@ sriov_op_queue(NMDevice *self, *op = (SriovOp){ .num_vfs = num_vfs, .autoprobe = autoprobe, + .eswitch_mode = eswitch_mode, .callback = callback, .callback_data = callback_data, }; @@ -7823,7 +7827,12 @@ device_init_static_sriov_num_vfs(NMDevice *self) -1, -1); if (num_vfs >= 0) - sriov_op_queue(self, num_vfs, NM_OPTION_BOOL_DEFAULT, NULL, NULL); + sriov_op_queue(self, + num_vfs, + NM_OPTION_BOOL_DEFAULT, + NM_SRIOV_ESWITCH_MODE_PRESERVE, + NULL, + NULL); } } @@ -10004,6 +10013,7 @@ activate_stage1_device_prepare(NMDevice *self) sriov_op_queue(self, nm_setting_sriov_get_total_vfs(s_sriov), NM_TERNARY_TO_OPTION_BOOL(autoprobe), + nm_setting_sriov_get_eswitch_mode(s_sriov), sriov_params_cb, nm_utils_user_data_pack(self, g_steal_pointer(&plat_vfs))); priv->stage1_sriov_state = NM_DEVICE_STAGE_STATE_PENDING; @@ -16720,6 +16730,7 @@ _set_state_full(NMDevice *self, NMDeviceState state, NMDeviceStateReason reason, sriov_op_queue(self, 0, NM_OPTION_BOOL_TRUE, + NM_SRIOV_ESWITCH_MODE_PRESERVE, sriov_reset_on_deactivate_cb, nm_utils_user_data_pack(self, GINT_TO_POINTER(reason))); } @@ -16769,7 +16780,12 @@ _set_state_full(NMDevice *self, NMDeviceState state, NMDeviceStateReason reason, if (priv->ifindex > 0 && (s_sriov = nm_device_get_applied_setting(self, NM_TYPE_SETTING_SRIOV))) { priv->sriov_reset_pending++; - sriov_op_queue(self, 0, NM_OPTION_BOOL_TRUE, sriov_reset_on_failure_cb, self); + sriov_op_queue(self, + 0, + NM_OPTION_BOOL_TRUE, + NM_SRIOV_ESWITCH_MODE_PRESERVE, + sriov_reset_on_failure_cb, + self); break; } /* Schedule the transition to DISCONNECTED. The device can't transition diff --git a/src/libnm-base/nm-base.h b/src/libnm-base/nm-base.h index 34944408ca..c313bccf6e 100644 --- a/src/libnm-base/nm-base.h +++ b/src/libnm-base/nm-base.h @@ -277,6 +277,13 @@ typedef enum { | _NM_VLAN_FLAG_LOOSE_BINDING | _NM_VLAN_FLAG_MVRP, } _NMVlanFlags; +typedef enum { + /* Mirrors libnm's NMSriovEswitchMode */ + _NM_SRIOV_ESWITCH_MODE_PRESERVE = -1, + _NM_SRIOV_ESWITCH_MODE_LEGACY = 0, + _NM_SRIOV_ESWITCH_MODE_SWITCHDEV = 1, +} _NMSriovEswitchMode; + /*****************************************************************************/ typedef enum { diff --git a/src/libnm-platform/nm-linux-platform.c b/src/libnm-platform/nm-linux-platform.c index a70782808c..28eafc837e 100644 --- a/src/libnm-platform/nm-linux-platform.c +++ b/src/libnm-platform/nm-linux-platform.c @@ -41,6 +41,7 @@ #include "libnm-platform/nm-netlink.h" #include "libnm-platform/nm-platform-utils.h" #include "libnm-platform/nmp-netns.h" +#include "libnm-platform/devlink/nm-devlink.h" #include "libnm-platform/wifi/nm-wifi-utils-wext.h" #include "libnm-platform/wifi/nm-wifi-utils.h" #include "libnm-platform/wpan/nm-wpan-utils.h" @@ -8900,10 +8901,12 @@ link_set_sriov_params_async(NMPlatform *platform, int ifindex, guint num_vfs, NMOptionBool autoprobe, + _NMSriovEswitchMode eswitch_mode, NMPlatformAsyncCallback callback, gpointer data, GCancellable *cancellable) { + NMLinuxPlatformPrivate *priv = NM_LINUX_PLATFORM_GET_PRIVATE(platform); nm_auto_pop_netns NMPNetns *netns = NULL; gs_free_error GError *error = NULL; nm_auto_close int dirfd = -1; @@ -8914,6 +8917,8 @@ link_set_sriov_params_async(NMPlatform *platform, gpointer packed; const char *values[3]; char buf[64]; + gs_free NMDevlink *devlink = NULL; + int current_eswitch_mode; g_return_if_fail(callback || !data); g_return_if_fail(cancellable); @@ -8926,6 +8931,8 @@ link_set_sriov_params_async(NMPlatform *platform, goto out_idle; } + devlink = nm_devlink_new(platform, priv->sk_genl_sync, ifindex); + dirfd = nm_platform_sysctl_open_netdir(platform, ifindex, ifname); if (!dirfd) { g_set_error_literal(&error, NM_UTILS_ERROR, NM_UTILS_ERROR_UNKNOWN, "couldn't open netdir"); @@ -8957,6 +8964,7 @@ link_set_sriov_params_async(NMPlatform *platform, 0, G_MAXUINT, -1); + current_autoprobe = nm_platform_sysctl_get_int_checked( platform, NMP_SYSCTL_PATHID_NETDIR_A(dirfd, ifname, "device/sriov_drivers_autoprobe"), @@ -8964,15 +8972,26 @@ link_set_sriov_params_async(NMPlatform *platform, 0, 1, -1); - if (current_autoprobe == -1 && errno == ENOENT) { /* older kernel versions don't have this sysctl. Assume the value is * "1". */ current_autoprobe = 1; } + current_eswitch_mode = nm_devlink_get_eswitch_mode(devlink, &error); + if (current_eswitch_mode < 0) { + /* We can proceed if eswith-mode is "preserve", otherwise propagate the error */ + if (eswitch_mode != _NM_SRIOV_ESWITCH_MODE_PRESERVE) + goto out_idle; + + _LOGD("%s", error->message); + g_clear_error(&error); + } + if (current_num == num_vfs - && (autoprobe == NM_OPTION_BOOL_DEFAULT || current_autoprobe == autoprobe)) + && (autoprobe == NM_OPTION_BOOL_DEFAULT || current_autoprobe == autoprobe) + && (eswitch_mode == _NM_SRIOV_ESWITCH_MODE_PRESERVE + || current_eswitch_mode == eswitch_mode)) goto out_idle; if (NM_IN_SET(autoprobe, NM_OPTION_BOOL_TRUE, NM_OPTION_BOOL_FALSE) @@ -8990,6 +9009,11 @@ link_set_sriov_params_async(NMPlatform *platform, goto out_idle; } + if (eswitch_mode != _NM_SRIOV_ESWITCH_MODE_PRESERVE && current_eswitch_mode != eswitch_mode) { + if (!nm_devlink_set_eswitch_mode(devlink, (enum devlink_eswitch_mode) eswitch_mode, &error)) + goto out_idle; + } + if (current_num == 0 && num_vfs == 0) goto out_idle; diff --git a/src/libnm-platform/nm-platform.c b/src/libnm-platform/nm-platform.c index acd8675ceb..7fb2a2e2e7 100644 --- a/src/libnm-platform/nm-platform.c +++ b/src/libnm-platform/nm-platform.c @@ -2024,6 +2024,7 @@ nm_platform_link_set_sriov_params_async(NMPlatform *self, int ifindex, guint num_vfs, NMOptionBool autoprobe, + _NMSriovEswitchMode eswitch_mode, NMPlatformAsyncCallback callback, gpointer callback_data, GCancellable *cancellable) @@ -2037,6 +2038,7 @@ nm_platform_link_set_sriov_params_async(NMPlatform *self, ifindex, num_vfs, autoprobe, + eswitch_mode, callback, callback_data, cancellable); diff --git a/src/libnm-platform/nm-platform.h b/src/libnm-platform/nm-platform.h index cc8d524bf6..8aba0e5c11 100644 --- a/src/libnm-platform/nm-platform.h +++ b/src/libnm-platform/nm-platform.h @@ -1174,6 +1174,7 @@ typedef struct { int ifindex, guint num_vfs, NMOptionBool autoprobe, + _NMSriovEswitchMode eswitch_mode, NMPlatformAsyncCallback callback, gpointer callback_data, GCancellable *cancellable); @@ -2037,6 +2038,7 @@ void nm_platform_link_set_sriov_params_async(NMPlatform *self, int ifindex, guint num_vfs, NMOptionBool autoprobe, + _NMSriovEswitchMode eswitch_mode, NMPlatformAsyncCallback callback, gpointer callback_data, GCancellable *cancellable); From 770340627ba78496843d731f971562c396fe2d04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= Date: Fri, 26 Jan 2024 17:42:04 +0100 Subject: [PATCH 4/9] platform: destroy VFs before changing the eswitch mode It is not safe to change the eswitch mode when there are VFs already created: it often fails, or even worse, doesn't fail immediatelly but there are later problems with the VFs. What is supposed to be well tested in all drivers is to change the eswitch mode with no VFs created, and then create the VFs, so let's set num_vfs=0 before changing the eswitch mode. As we want to change num_vfs asynchronously in a separate thread, we need to do a multi-step process with callbacks each time that a step finish (before it was just set num_vfs asynchronously and invoke the callback when it's done). This makes link_set_sriov_params_async to become even larger and more complex than it already was. Refactor it to make it cleaner and easier to follow, and hopefully less error prone, and implement that multi-step process. --- src/libnm-platform/nm-linux-platform.c | 425 ++++++++++++++++++------- 1 file changed, 311 insertions(+), 114 deletions(-) diff --git a/src/libnm-platform/nm-linux-platform.c b/src/libnm-platform/nm-linux-platform.c index 28eafc837e..1e92f60309 100644 --- a/src/libnm-platform/nm-linux-platform.c +++ b/src/libnm-platform/nm-linux-platform.c @@ -8882,20 +8882,240 @@ nla_put_failure: g_return_val_if_reached(FALSE); } -static void -sriov_idle_cb(gpointer user_data, GCancellable *cancellable) +static gint64 +sriov_read_sysctl_uint(NMPlatform *platform, + int dirfd, + const char *ifname, + const char *dev_file, + GError **error) { - gs_unref_object NMPlatform *platform = NULL; - gs_free_error GError *cancelled_error = NULL; - gs_free_error GError *error = NULL; - NMPlatformAsyncCallback callback; - gpointer callback_data; + const char *path; + gint64 val; + + nm_assert(NM_STRLEN("device/%s") + strlen(dev_file)); + + path = nm_sprintf_bufa(256, "device/%s", dev_file); + val = nm_platform_sysctl_get_int_checked(platform, + NMP_SYSCTL_PATHID_NETDIR_UNSAFE_A(dirfd, ifname, path), + 10, + 0, + G_MAXUINT, + -1); + + if (val < 0) { + g_set_error(error, + NM_UTILS_ERROR, + NM_UTILS_ERROR_UNKNOWN, + "couldn't read %s: %s", + dev_file, + nm_strerror_native(errno)); + return -errno; + } + + return val; +} + +static gboolean +sriov_set_autoprobe(NMPlatform *platform, + int dirfd, + const char *ifname, + NMOptionBool autoprobe, + GError **error) +{ + int current_autoprobe = + (int) sriov_read_sysctl_uint(platform, dirfd, ifname, "sriov_drivers_autoprobe", error); + + if (current_autoprobe == -ENOENT) { + /* older kernel versions don't have this sysctl. Assume the value is "1". */ + current_autoprobe = 1; + g_clear_error(error); + } + + if (current_autoprobe < 0) + return FALSE; + + if (autoprobe != NM_OPTION_BOOL_DEFAULT && current_autoprobe != autoprobe) { + if (!nm_platform_sysctl_set( + platform, + NMP_SYSCTL_PATHID_NETDIR_A(dirfd, ifname, "device/sriov_drivers_autoprobe"), + autoprobe == 1 ? "1" : "0")) { + g_set_error(error, + NM_UTILS_ERROR, + NM_UTILS_ERROR_UNKNOWN, + "couldn't set SR-IOV drivers-autoprobe to %d: %s", + (int) autoprobe, + nm_strerror_native(errno)); + return FALSE; + } + } + + return TRUE; +} + +#define _SRIOV_ASYNC_MAX_STEPS 4 + +typedef struct _SriovAsyncState { + NMPlatform *platform; + int ifindex; + guint num_vfs; + _NMSriovEswitchMode eswitch_mode; + void (*steps[_SRIOV_ASYNC_MAX_STEPS])(struct _SriovAsyncState *); + int current_step; + NMPlatformAsyncCallback callback; + gpointer data; + GCancellable *cancellable; +} SriovAsyncState; + +static void +sriov_async_invoke_callback(gpointer user_data, GCancellable *cancellable) +{ + gs_free_error GError *cancelled_error = NULL; + gs_free_error GError *error = NULL; + NMPlatformAsyncCallback callback; + gpointer callback_data; g_cancellable_set_error_if_cancelled(cancellable, &cancelled_error); - nm_utils_user_data_unpack(user_data, &platform, &error, &callback, &callback_data); + nm_utils_user_data_unpack(user_data, &error, &callback, &callback_data); callback(cancelled_error ?: error, callback_data); } +static void +sriov_async_finish_err(SriovAsyncState *async_state, GError *error) +{ + NMPlatform *platform = async_state->platform; + + _LOGD("finished configuring SR-IOV, error: %s", error ? error->message : "none"); + + if (async_state->callback) { + /* nm_platform_link_set_sriov_params() promises to always call the callback, + * and always asynchronously. We might have reached here without doing + * any asynchronous task, so invoke the user's callback in the idle task + * to make it asynchronous. Actually, let's make it simple and do it + * always in this way, even if asynchronous tasks were made. + */ + gpointer packed = nm_utils_user_data_pack(g_steal_pointer(&error), + async_state->callback, + async_state->data); + nm_utils_invoke_on_idle(async_state->cancellable, sriov_async_invoke_callback, packed); + } + + g_object_unref(async_state->platform); + g_object_unref(async_state->cancellable); + g_free(async_state); + g_free(error); +} + +static void +sriov_async_call_next_step(SriovAsyncState *async_state) +{ + if (g_cancellable_is_cancelled(async_state->cancellable)) { + sriov_async_finish_err(async_state, NULL); /* The error will be set later */ + return; + } + + async_state->current_step++; + + nm_assert(async_state->current_step >= 0); + nm_assert(async_state->current_step < _SRIOV_ASYNC_MAX_STEPS); + nm_assert(async_state->steps[async_state->current_step] != NULL); + + async_state->steps[async_state->current_step](async_state); +} + +static void +sriov_async_sysctl_done_cb(GError *error, gpointer data) +{ + SriovAsyncState *async_state = data; + + if (error) + sriov_async_finish_err(async_state, g_error_copy(error)); + else + sriov_async_call_next_step(async_state); +} + +static void +sriov_async_set_num_vfs(SriovAsyncState *async_state, const char *val) +{ + NMPlatform *platform = async_state->platform; + const char *values[] = {val, NULL}; + nm_auto_close int dirfd = -1; + char ifname[IFNAMSIZ]; + gs_free_error GError *error = NULL; + + dirfd = nm_platform_sysctl_open_netdir(platform, async_state->ifindex, ifname); + if (!dirfd) { + g_set_error(&error, + NM_UTILS_ERROR, + NM_UTILS_ERROR_UNKNOWN, + "couldn't open netdir for device with ifindex %d", + async_state->ifindex); + sriov_async_finish_err(async_state, g_steal_pointer(&error)); + return; + } + + sysctl_set_async(platform, + NMP_SYSCTL_PATHID_NETDIR_A(dirfd, ifname, "device/sriov_numvfs"), + values, + sriov_async_sysctl_done_cb, + async_state, + async_state->cancellable); +} + +static void +sriov_async_step1_destroy_vfs(SriovAsyncState *async_state) +{ + NMPlatform *platform = async_state->platform; + + _LOGD("destroying VFs before configuring SR-IOV"); + + sriov_async_set_num_vfs(async_state, "0"); +} + +static void +sriov_async_step2_set_eswitch_mode(SriovAsyncState *async_state) +{ + NMPlatform *platform = async_state->platform; + NMLinuxPlatformPrivate *priv = NM_LINUX_PLATFORM_GET_PRIVATE(platform); + enum devlink_eswitch_mode eswitch_mode = (enum devlink_eswitch_mode) async_state->eswitch_mode; + gs_free NMDevlink *devlink = NULL; + gs_free_error GError *error = NULL; + + _LOGD("setting eswitch-mode to '%d'", (int) eswitch_mode); + + /* We set eswitch mode as a sriov_async step because it's in the middle of + * other steps that are async. However, this step itself is synchronous. */ + devlink = nm_devlink_new(platform, priv->sk_genl_sync, async_state->ifindex); + if (!nm_devlink_set_eswitch_mode(devlink, eswitch_mode, &error)) { + sriov_async_finish_err(async_state, g_steal_pointer(&error)); + return; + } + + sriov_async_call_next_step(async_state); +} + +static void +sriov_async_step3_create_vfs(SriovAsyncState *async_state) +{ + NMPlatform *platform = async_state->platform; + const char *val = nm_sprintf_bufa(32, "%u", async_state->num_vfs); + + _LOGD("setting sriov_numvfs to %u", async_state->num_vfs); + + sriov_async_set_num_vfs(async_state, val); +} + +static void +sriov_async_step_finish_ok(SriovAsyncState *async_state) +{ + sriov_async_finish_err(async_state, NULL); +} + +/* + * Take special care when setting new values: + * - don't touch anything if the right values are already set + * - to change the number of VFs, eswitch mode or autoprobe we need to destroy existing VFs + * - the autoprobe setting is irrelevant when numvfs is zero + */ static void link_set_sriov_params_async(NMPlatform *platform, int ifindex, @@ -8906,140 +9126,117 @@ link_set_sriov_params_async(NMPlatform *platform, gpointer data, GCancellable *cancellable) { + SriovAsyncState *async_state; NMLinuxPlatformPrivate *priv = NM_LINUX_PLATFORM_GET_PRIVATE(platform); nm_auto_pop_netns NMPNetns *netns = NULL; gs_free_error GError *error = NULL; nm_auto_close int dirfd = -1; - int current_autoprobe; - guint i, total; - gint64 current_num; char ifname[IFNAMSIZ]; - gpointer packed; - const char *values[3]; - char buf[64]; - gs_free NMDevlink *devlink = NULL; - int current_eswitch_mode; + int max_vfs; + int current_num_vfs; + int current_eswitch_mode = _NM_SRIOV_ESWITCH_MODE_PRESERVE; + gboolean need_change_eswitch_mode; + gboolean need_change_vfs; + gboolean need_destroy_vfs; + gboolean need_create_vfs; + int i; g_return_if_fail(callback || !data); g_return_if_fail(cancellable); + async_state = g_new0(SriovAsyncState, 1); + async_state->platform = g_object_ref(platform); + async_state->ifindex = ifindex; + async_state->eswitch_mode = eswitch_mode; + async_state->current_step = -1; + async_state->callback = callback; + async_state->data = data; + async_state->cancellable = g_object_ref(cancellable); + if (!nm_platform_netns_push(platform, &netns)) { g_set_error_literal(&error, NM_UTILS_ERROR, NM_UTILS_ERROR_UNKNOWN, - "couldn't change namespace"); - goto out_idle; + "couldn't change network namespace"); + sriov_async_finish_err(async_state, g_steal_pointer(&error)); + return; } - devlink = nm_devlink_new(platform, priv->sk_genl_sync, ifindex); - dirfd = nm_platform_sysctl_open_netdir(platform, ifindex, ifname); if (!dirfd) { - g_set_error_literal(&error, NM_UTILS_ERROR, NM_UTILS_ERROR_UNKNOWN, "couldn't open netdir"); - goto out_idle; - } - - total = nm_platform_sysctl_get_int_checked( - platform, - NMP_SYSCTL_PATHID_NETDIR_A(dirfd, ifname, "device/sriov_totalvfs"), - 10, - 0, - G_MAXUINT, - 0); - if (!errno && num_vfs > total) { - _LOGW("link: %d only supports %u VFs (requested %u)", ifindex, total, num_vfs); - num_vfs = total; - } - - /* - * Take special care when setting new values: - * - don't touch anything if the right values are already set - * - to change the number of VFs or autoprobe we need to destroy existing VFs - * - the autoprobe setting is irrelevant when numvfs is zero - */ - current_num = nm_platform_sysctl_get_int_checked( - platform, - NMP_SYSCTL_PATHID_NETDIR_A(dirfd, ifname, "device/sriov_numvfs"), - 10, - 0, - G_MAXUINT, - -1); - - current_autoprobe = nm_platform_sysctl_get_int_checked( - platform, - NMP_SYSCTL_PATHID_NETDIR_A(dirfd, ifname, "device/sriov_drivers_autoprobe"), - 10, - 0, - 1, - -1); - if (current_autoprobe == -1 && errno == ENOENT) { - /* older kernel versions don't have this sysctl. Assume the value is - * "1". */ - current_autoprobe = 1; - } - - current_eswitch_mode = nm_devlink_get_eswitch_mode(devlink, &error); - if (current_eswitch_mode < 0) { - /* We can proceed if eswith-mode is "preserve", otherwise propagate the error */ - if (eswitch_mode != _NM_SRIOV_ESWITCH_MODE_PRESERVE) - goto out_idle; - - _LOGD("%s", error->message); - g_clear_error(&error); - } - - if (current_num == num_vfs - && (autoprobe == NM_OPTION_BOOL_DEFAULT || current_autoprobe == autoprobe) - && (eswitch_mode == _NM_SRIOV_ESWITCH_MODE_PRESERVE - || current_eswitch_mode == eswitch_mode)) - goto out_idle; - - if (NM_IN_SET(autoprobe, NM_OPTION_BOOL_TRUE, NM_OPTION_BOOL_FALSE) - && current_autoprobe != autoprobe - && !nm_platform_sysctl_set( - platform, - NMP_SYSCTL_PATHID_NETDIR_A(dirfd, ifname, "device/sriov_drivers_autoprobe"), - nm_sprintf_buf(buf, "%d", (int) autoprobe))) { g_set_error(&error, NM_UTILS_ERROR, NM_UTILS_ERROR_UNKNOWN, - "couldn't set SR-IOV drivers-autoprobe to %d: %s", - (int) autoprobe, - nm_strerror_native(errno)); - goto out_idle; + "couldn't open netdir for device with ifindex %d", + ifindex); + sriov_async_finish_err(async_state, g_steal_pointer(&error)); + return; } - if (eswitch_mode != _NM_SRIOV_ESWITCH_MODE_PRESERVE && current_eswitch_mode != eswitch_mode) { - if (!nm_devlink_set_eswitch_mode(devlink, (enum devlink_eswitch_mode) eswitch_mode, &error)) - goto out_idle; + current_num_vfs = sriov_read_sysctl_uint(platform, dirfd, ifname, "sriov_numvfs", &error); + if (current_num_vfs < 0) { + sriov_async_finish_err(async_state, g_steal_pointer(&error)); + return; } - if (current_num == 0 && num_vfs == 0) - goto out_idle; + max_vfs = sriov_read_sysctl_uint(platform, dirfd, ifname, "sriov_totalvfs", &error); + if (max_vfs < 0) { + sriov_async_finish_err(async_state, g_steal_pointer(&error)); + return; + } + + if (num_vfs > max_vfs) { + _LOGW("link: device %d only supports %u VFs (requested %u)", ifindex, max_vfs, num_vfs); + _LOGW("link: reducing num_vfs to %u for device %d", max_vfs, ifindex); + num_vfs = max_vfs; + } + + async_state->num_vfs = num_vfs; + + /* Setting autoprobe goes first, we can do it synchronously */ + if (num_vfs > 0 && !sriov_set_autoprobe(platform, dirfd, ifname, autoprobe, &error)) { + sriov_async_finish_err(async_state, g_steal_pointer(&error)); + return; + } + + if (eswitch_mode != _NM_SRIOV_ESWITCH_MODE_PRESERVE) { + gs_free NMDevlink *devlink = nm_devlink_new(platform, priv->sk_genl_sync, ifindex); + + current_eswitch_mode = nm_devlink_get_eswitch_mode(devlink, &error); + if (current_eswitch_mode < 0) { + sriov_async_finish_err(async_state, g_steal_pointer(&error)); + return; + } + } + + /* Decide what actions we must do. Note that we might need to destroy the VFs even + * if num_vfs == current_num_vfs, for example to change the eswitch mode. Because of + * that, we might need to create VFs even if num_vfs == current_num_vfs. + * Steps in order (unnecessary steps are skipped): + * 1. Destroy VFs + * 2. Set eswitch mode + * 3. Create VFs + * 4. Invoke caller's callback + */ + need_change_eswitch_mode = + eswitch_mode != _NM_SRIOV_ESWITCH_MODE_PRESERVE && current_eswitch_mode != eswitch_mode; + need_change_vfs = num_vfs != current_num_vfs; + need_destroy_vfs = current_num_vfs > 0 && (need_change_eswitch_mode || need_change_vfs); + need_create_vfs = (current_num_vfs == 0 || need_destroy_vfs) && num_vfs > 0; i = 0; - if (current_num != 0) - values[i++] = "0"; - if (num_vfs != 0) - values[i++] = nm_sprintf_bufa(32, "%u", num_vfs); - values[i++] = NULL; + if (need_destroy_vfs) + async_state->steps[i++] = sriov_async_step1_destroy_vfs; + if (need_change_eswitch_mode) + async_state->steps[i++] = sriov_async_step2_set_eswitch_mode; + if (need_create_vfs) + async_state->steps[i++] = sriov_async_step3_create_vfs; - sysctl_set_async(platform, - NMP_SYSCTL_PATHID_NETDIR_A(dirfd, ifname, "device/sriov_numvfs"), - values, - callback, - data, - cancellable); - return; + nm_assert(i < _SRIOV_ASYNC_MAX_STEPS); -out_idle: - if (callback) { - packed = nm_utils_user_data_pack(g_object_ref(platform), - g_steal_pointer(&error), - callback, - data); - nm_utils_invoke_on_idle(cancellable, sriov_idle_cb, packed); - } + async_state->steps[i] = sriov_async_step_finish_ok; + + sriov_async_call_next_step(async_state); } static gboolean From 8a88386e3a1967eed3b36110c1108b7f4c97b98d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= Date: Thu, 8 Feb 2024 12:46:27 +0100 Subject: [PATCH 5/9] sriov: add eswitch-inline-mode and eswitch-encap-mode properties Those are related to the eswitch mode and can be configured together. --- src/libnm-client-impl/libnm.ver | 4 + ...gen-metadata-nm-settings-libnm-core.xml.in | 8 ++ src/libnm-core-impl/nm-setting-sriov.c | 88 ++++++++++++++++++- src/libnm-core-public/nm-setting-sriov.h | 46 +++++++++- src/libnmc-setting/nm-meta-setting-desc.c | 6 ++ src/libnmc-setting/settings-docs.h.in | 2 + .../gen-metadata-nm-settings-nmcli.xml.in | 8 ++ 7 files changed, 157 insertions(+), 5 deletions(-) diff --git a/src/libnm-client-impl/libnm.ver b/src/libnm-client-impl/libnm.ver index 17fc25c6c2..73fdec5bad 100644 --- a/src/libnm-client-impl/libnm.ver +++ b/src/libnm-client-impl/libnm.ver @@ -1969,4 +1969,8 @@ global: nm_setting_get_enum_property_type; nm_setting_sriov_get_eswitch_mode; nm_sriov_eswitch_mode_get_type; + nm_setting_sriov_get_eswitch_inline_mode; + nm_sriov_eswitch_inline_mode_get_type; + nm_setting_sriov_get_eswitch_encap_mode; + nm_sriov_eswitch_encap_mode_get_type; } libnm_1_44_0; diff --git a/src/libnm-core-impl/gen-metadata-nm-settings-libnm-core.xml.in b/src/libnm-core-impl/gen-metadata-nm-settings-libnm-core.xml.in index f3fa8e162d..b2748beb1e 100644 --- a/src/libnm-core-impl/gen-metadata-nm-settings-libnm-core.xml.in +++ b/src/libnm-core-impl/gen-metadata-nm-settings-libnm-core.xml.in @@ -2198,6 +2198,14 @@ dbus-type="i" gprop-type="NMTernary" /> + + eswitch_mode; } +/** + * nm_setting_sriov_get_eswitch_inline_mode: + * @setting: the #NMSettingSriov + * + * Returns: the value contained in the #NMSettingSriov:eswitch-inline-mode property. + * + * Since: 1.46 + */ +NMSriovEswitchInlineMode +nm_setting_sriov_get_eswitch_inline_mode(NMSettingSriov *setting) +{ + g_return_val_if_fail(NM_IS_SETTING_SRIOV(setting), NM_SRIOV_ESWITCH_INLINE_MODE_PRESERVE); + + return setting->eswitch_inline_mode; +} + +/** + * nm_setting_sriov_get_eswitch_encap_mode: + * @setting: the #NMSettingSriov + * + * Returns: the value contained in the #NMSettingSriov:eswitch-encap-mode property. + * + * Since: 1.46 + */ +NMSriovEswitchEncapMode +nm_setting_sriov_get_eswitch_encap_mode(NMSettingSriov *setting) +{ + g_return_val_if_fail(NM_IS_SETTING_SRIOV(setting), NM_SRIOV_ESWITCH_ENCAP_MODE_PRESERVE); + + return setting->eswitch_encap_mode; +} + static int vf_index_compare(gconstpointer a, gconstpointer b) { @@ -1376,6 +1412,56 @@ nm_setting_sriov_class_init(NMSettingSriovClass *klass) NMSettingSriov, eswitch_mode); + /** + * NMSettingSriov:eswitch-inline-mode + * + * Select the eswitch inline-mode of the device. Some HWs need the VF driver to put + * part of the packet headers on the TX descriptor so the e-switch can do proper + * matching and steering. + * + * Currently it's only supported for PCI PF devices, and only if the eswitch device + * is managed from the same PCI address than the PF. + * + * If set to %NM_SRIOV_ESWITCH_INLINE_MODE_PRESERVE (default) the eswitch inline-mode + * won't be modified by NetworkManager. + * + * Since: 1.46 + */ + _nm_setting_property_define_direct_enum(properties_override, + obj_properties, + NM_SETTING_SRIOV_ESWITCH_INLINE_MODE, + PROP_ESWITCH_INLINE_MODE, + NM_TYPE_SRIOV_ESWITCH_INLINE_MODE, + NM_SRIOV_ESWITCH_INLINE_MODE_PRESERVE, + NM_SETTING_PARAM_FUZZY_IGNORE, + NULL, + NMSettingSriov, + eswitch_inline_mode); + + /** + * NMSettingSriov:eswitch-encap-mode + * + * Select the eswitch encapsulation support. + * + * Currently it's only supported for PCI PF devices, and only if the eswitch device + * is managed from the same PCI address than the PF. + * + * If set to %NM_SRIOV_ESWITCH_ENCAP_MODE_PRESERVE (default) the eswitch encap-mode + * won't be modified by NetworkManager. + * + * Since: 1.46 + */ + _nm_setting_property_define_direct_enum(properties_override, + obj_properties, + NM_SETTING_SRIOV_ESWITCH_ENCAP_MODE, + PROP_ESWITCH_ENCAP_MODE, + NM_TYPE_SRIOV_ESWITCH_ENCAP_MODE, + NM_SRIOV_ESWITCH_ENCAP_MODE_PRESERVE, + NM_SETTING_PARAM_FUZZY_IGNORE, + NULL, + NMSettingSriov, + eswitch_encap_mode); + g_object_class_install_properties(object_class, _PROPERTY_ENUMS_LAST, obj_properties); _nm_setting_class_commit(setting_class, diff --git a/src/libnm-core-public/nm-setting-sriov.h b/src/libnm-core-public/nm-setting-sriov.h index 16e493258f..e210ccbdc4 100644 --- a/src/libnm-core-public/nm-setting-sriov.h +++ b/src/libnm-core-public/nm-setting-sriov.h @@ -26,10 +26,12 @@ G_BEGIN_DECLS #define NM_SETTING_SRIOV_SETTING_NAME "sriov" -#define NM_SETTING_SRIOV_TOTAL_VFS "total-vfs" -#define NM_SETTING_SRIOV_VFS "vfs" -#define NM_SETTING_SRIOV_AUTOPROBE_DRIVERS "autoprobe-drivers" -#define NM_SETTING_SRIOV_ESWITCH_MODE "eswitch-mode" +#define NM_SETTING_SRIOV_TOTAL_VFS "total-vfs" +#define NM_SETTING_SRIOV_VFS "vfs" +#define NM_SETTING_SRIOV_AUTOPROBE_DRIVERS "autoprobe-drivers" +#define NM_SETTING_SRIOV_ESWITCH_MODE "eswitch-mode" +#define NM_SETTING_SRIOV_ESWITCH_INLINE_MODE "eswitch-inline-mode" +#define NM_SETTING_SRIOV_ESWITCH_ENCAP_MODE "eswitch-encap-mode" #define NM_SRIOV_VF_ATTRIBUTE_MAC "mac" #define NM_SRIOV_VF_ATTRIBUTE_SPOOF_CHECK "spoof-check" @@ -68,6 +70,38 @@ typedef enum { NM_SRIOV_ESWITCH_MODE_SWITCHDEV = 1, } NMSriovEswitchMode; +/** + * NMSriovEswitchInlineMode: + * @NM_SRIOV_ESWITCH_INLINE_MODE_PRESERVE: don't modify current inline-mode + * @NM_SRIOV_ESWITCH_INLINE_MODE_NONE: don't use inline mode + * @NM_SRIOV_ESWITCH_INLINE_MODE_LINK: L2 mode + * @NM_SRIOV_ESWITCH_INLINE_MODE_NETWORK: L3 mode + * @NM_SRIOV_ESWITCH_INLINE_MODE_TRANSPORT: L4 mode + * + * Since: 1.46 + */ +typedef enum { + NM_SRIOV_ESWITCH_INLINE_MODE_PRESERVE = -1, + NM_SRIOV_ESWITCH_INLINE_MODE_NONE = 0, + NM_SRIOV_ESWITCH_INLINE_MODE_LINK = 1, + NM_SRIOV_ESWITCH_INLINE_MODE_NETWORK = 2, + NM_SRIOV_ESWITCH_INLINE_MODE_TRANSPORT = 3, +} NMSriovEswitchInlineMode; + +/** + * NMSriovEswitchEncapMode: + * @NM_SRIOV_ESWITCH_ENCAP_MODE_PRESERVE: don't modify current encap-mode + * @NM_SRIOV_ESWITCH_ENCAP_MODE_NONE: disable encapsulation mode + * @NM_SRIOV_ESWITCH_ENCAP_MODE_BASIC: enable encapsulation mode + * + * Since: 1.46 + */ +typedef enum { + NM_SRIOV_ESWITCH_ENCAP_MODE_PRESERVE = -1, + NM_SRIOV_ESWITCH_ENCAP_MODE_NONE = 0, + NM_SRIOV_ESWITCH_ENCAP_MODE_BASIC = 1, +} NMSriovEswitchEncapMode; + NM_AVAILABLE_IN_1_14 GType nm_setting_sriov_get_type(void); NM_AVAILABLE_IN_1_14 @@ -90,6 +124,10 @@ NM_AVAILABLE_IN_1_14 NMTernary nm_setting_sriov_get_autoprobe_drivers(NMSettingSriov *setting); NM_AVAILABLE_IN_1_46 NMSriovEswitchMode nm_setting_sriov_get_eswitch_mode(NMSettingSriov *setting); +NM_AVAILABLE_IN_1_46 +NMSriovEswitchInlineMode nm_setting_sriov_get_eswitch_inline_mode(NMSettingSriov *setting); +NM_AVAILABLE_IN_1_46 +NMSriovEswitchEncapMode nm_setting_sriov_get_eswitch_encap_mode(NMSettingSriov *setting); NM_AVAILABLE_IN_1_14 gboolean nm_sriov_vf_add_vlan(NMSriovVF *vf, guint vlan_id); diff --git a/src/libnmc-setting/nm-meta-setting-desc.c b/src/libnmc-setting/nm-meta-setting-desc.c index 8a53af36e2..4876acc2e9 100644 --- a/src/libnmc-setting/nm-meta-setting-desc.c +++ b/src/libnmc-setting/nm-meta-setting-desc.c @@ -7366,6 +7366,12 @@ static const NMMetaPropertyInfo *const property_infos_SRIOV[] = { PROPERTY_INFO_WITH_DESC (NM_SETTING_SRIOV_ESWITCH_MODE, .property_type = &_pt_gobject_enum, ), + PROPERTY_INFO_WITH_DESC (NM_SETTING_SRIOV_ESWITCH_INLINE_MODE, + .property_type = &_pt_gobject_enum, + ), + PROPERTY_INFO_WITH_DESC (NM_SETTING_SRIOV_ESWITCH_ENCAP_MODE, + .property_type = &_pt_gobject_enum, + ), NULL }; diff --git a/src/libnmc-setting/settings-docs.h.in b/src/libnmc-setting/settings-docs.h.in index 7e88de3a71..0ca03e5230 100644 --- a/src/libnmc-setting/settings-docs.h.in +++ b/src/libnmc-setting/settings-docs.h.in @@ -310,6 +310,8 @@ #define DESCRIBE_DOC_NM_SETTING_SERIAL_SEND_DELAY N_("Time to delay between each byte sent to the modem, in microseconds.") #define DESCRIBE_DOC_NM_SETTING_SERIAL_STOPBITS N_("Number of stop bits for communication on the serial port. Either 1 or 2. The 1 in \"8n1\" for example.") #define DESCRIBE_DOC_NM_SETTING_SRIOV_AUTOPROBE_DRIVERS N_("Whether to autoprobe virtual functions by a compatible driver. If set to \"true\" (1), the kernel will try to bind VFs to a compatible driver and if this succeeds a new network interface will be instantiated for each VF. If set to \"false\" (0), VFs will not be claimed and no network interfaces will be created for them. When set to \"default\" (-1), the global default is used; in case the global default is unspecified it is assumed to be \"true\" (1).") +#define DESCRIBE_DOC_NM_SETTING_SRIOV_ESWITCH_ENCAP_MODE N_("Select the eswitch encapsulation support. Currently it's only supported for PCI PF devices, and only if the eswitch device is managed from the same PCI address than the PF. If set to \"preserve\" (-1) (default) the eswitch encap-mode won't be modified by NetworkManager.") +#define DESCRIBE_DOC_NM_SETTING_SRIOV_ESWITCH_INLINE_MODE N_("Select the eswitch inline-mode of the device. Some HWs need the VF driver to put part of the packet headers on the TX descriptor so the e-switch can do proper matching and steering. Currently it's only supported for PCI PF devices, and only if the eswitch device is managed from the same PCI address than the PF. If set to \"preserve\" (-1) (default) the eswitch inline-mode won't be modified by NetworkManager.") #define DESCRIBE_DOC_NM_SETTING_SRIOV_ESWITCH_MODE N_("Select the eswitch mode of the device. Currently it's only supported for PCI PF devices, and only if the eswitch device is managed from the same PCI address than the PF. If set to \"preserve\" (-1) (default) the eswitch mode won't be modified by NetworkManager.") #define DESCRIBE_DOC_NM_SETTING_SRIOV_TOTAL_VFS N_("The total number of virtual functions to create. Note that when the sriov setting is present NetworkManager enforces the number of virtual functions on the interface (also when it is zero) during activation and resets it upon deactivation. To prevent any changes to SR-IOV parameters don't add a sriov setting to the connection.") #define DESCRIBE_DOC_NM_SETTING_SRIOV_VFS N_("Array of virtual function descriptors. Each VF descriptor is a dictionary mapping attribute names to GVariant values. The 'index' entry is mandatory for each VF. When represented as string a VF is in the form: \"INDEX [ATTR=VALUE[ ATTR=VALUE]...]\". for example: \"2 mac=00:11:22:33:44:55 spoof-check=true\". Multiple VFs can be specified using a comma as separator. Currently, the following attributes are supported: mac, spoof-check, trust, min-tx-rate, max-tx-rate, vlans. The \"vlans\" attribute is represented as a semicolon-separated list of VLAN descriptors, where each descriptor has the form \"ID[.PRIORITY[.PROTO]]\". PROTO can be either 'q' for 802.1Q (the default) or 'ad' for 802.1ad.") diff --git a/src/nmcli/gen-metadata-nm-settings-nmcli.xml.in b/src/nmcli/gen-metadata-nm-settings-nmcli.xml.in index 177b1a5e78..fa43bdcf70 100644 --- a/src/nmcli/gen-metadata-nm-settings-nmcli.xml.in +++ b/src/nmcli/gen-metadata-nm-settings-nmcli.xml.in @@ -1824,6 +1824,14 @@ nmcli-description="Select the eswitch mode of the device. Currently it's only supported for PCI PF devices, and only if the eswitch device is managed from the same PCI address than the PF. If set to "preserve" (-1) (default) the eswitch mode won't be modified by NetworkManager." format="choice (NMSriovEswitchMode)" values="preserve (-1), legacy (0), switchdev (1)" /> + + Date: Wed, 14 Feb 2024 10:09:56 +0100 Subject: [PATCH 6/9] devlink: get and set eswitch inline-mode and encap-mode The setter function allow to set to "preserve" to modify only some of them. --- src/libnm-base/nm-base.h | 21 +++- src/libnm-platform/devlink/nm-devlink.c | 145 ++++++++++++++---------- src/libnm-platform/devlink/nm-devlink.h | 12 +- src/libnm-platform/nm-linux-platform.c | 15 ++- 4 files changed, 123 insertions(+), 70 deletions(-) diff --git a/src/libnm-base/nm-base.h b/src/libnm-base/nm-base.h index c313bccf6e..d77ff9ec1e 100644 --- a/src/libnm-base/nm-base.h +++ b/src/libnm-base/nm-base.h @@ -278,12 +278,31 @@ typedef enum { } _NMVlanFlags; typedef enum { - /* Mirrors libnm's NMSriovEswitchMode */ + /* Mirrors libnm's NMSriovEswitchMode. + * Values >= 0 mirror kernel's enum devlink_eswitch_mode. */ _NM_SRIOV_ESWITCH_MODE_PRESERVE = -1, _NM_SRIOV_ESWITCH_MODE_LEGACY = 0, _NM_SRIOV_ESWITCH_MODE_SWITCHDEV = 1, } _NMSriovEswitchMode; +typedef enum { + /* Mirrors libnm's NMSriovEswitchInlineMode. + * Values >= 0 mirror kernel's enum devlink_eswitch_inline_mode. */ + _NM_SRIOV_ESWITCH_INLINE_MODE_PRESERVE = -1, + _NM_SRIOV_ESWITCH_INLINE_MODE_NONE = 0, + _NM_SRIOV_ESWITCH_INLINE_MODE_LINK = 1, + _NM_SRIOV_ESWITCH_INLINE_MODE_NETWORK = 2, + _NM_SRIOV_ESWITCH_INLINE_MODE_TRANSPORT = 3, +} _NMSriovEswitchInlineMode; + +typedef enum { + /* Mirrors libnm's NMSriovEswitchEncapMode. + * Values >= 0 mirror kernel's enum devlink_eswitch_encap_mode. */ + _NM_SRIOV_ESWITCH_ENCAP_MODE_PRESERVE = -1, + _NM_SRIOV_ESWITCH_ENCAP_MODE_NONE = 0, + _NM_SRIOV_ESWITCH_ENCAP_MODE_BASIC = 1, +} _NMSriovEswitchEncapMode; + /*****************************************************************************/ typedef enum { diff --git a/src/libnm-platform/devlink/nm-devlink.c b/src/libnm-platform/devlink/nm-devlink.c index dc68bafde5..a2fee3bcf3 100644 --- a/src/libnm-platform/devlink/nm-devlink.c +++ b/src/libnm-platform/devlink/nm-devlink.c @@ -58,7 +58,7 @@ struct _NMDevlink { NMDevlink * nm_devlink_new(NMPlatform *platform, struct nl_sock *genl_sock_sync, int ifindex) { - NMDevlink *self = g_new(NMDevlink, 1); + NMDevlink *self = g_new(NMDevlink, 1); self->plat = platform; self->genl_sock_sync = genl_sock_sync; @@ -230,81 +230,41 @@ static int devlink_parse_eswitch_mode(const struct nl_msg *msg, void *data) { static const struct nla_policy eswitch_policy[] = { - [DEVLINK_ATTR_ESWITCH_MODE] = {.type = NLA_U16}, + [DEVLINK_ATTR_ESWITCH_MODE] = {.type = NLA_U16}, + [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = {.type = NLA_U8}, + [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = {.type = NLA_U8}, }; - enum devlink_eswitch_mode *eswitch_mode = data; - struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); - struct nlattr *tb[G_N_ELEMENTS(eswitch_policy)]; + NMDevlinkEswitchParams *params = data; + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *tb[G_N_ELEMENTS(eswitch_policy)]; if (nla_parse_arr(tb, genlmsg_attrdata(gnlh, 0), genlmsg_attrlen(gnlh, 0), eswitch_policy) < 0) return NL_SKIP; - if (!tb[DEVLINK_ATTR_ESWITCH_MODE]) + if (!tb[DEVLINK_ATTR_ESWITCH_MODE] || !tb[DEVLINK_ATTR_ESWITCH_INLINE_MODE] + || !tb[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]) return NL_SKIP; - *eswitch_mode = nla_get_u16(tb[DEVLINK_ATTR_ESWITCH_MODE]); + params->mode = (_NMSriovEswitchMode) nla_get_u16(tb[DEVLINK_ATTR_ESWITCH_MODE]); + params->encap_mode = (_NMSriovEswitchEncapMode) nla_get_u8(tb[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]); + params->inline_mode = + (_NMSriovEswitchInlineMode) nla_get_u8(tb[DEVLINK_ATTR_ESWITCH_INLINE_MODE]); return NL_OK; } /* - * nm_devlink_get_eswitch_mode: + * nm_devlink_get_eswitch_params: * @self: the #NMDevlink + * @out_params: the eswitch parameters read via Devlink * @error: the error location * - * Get the eswitch mode of the device related to the #NMDevlink instance. Note - * that this might be unsupported by the device (see nm_devlink_get_dev()). - * - * Returns: the eswitch mode of the device, or <0 in case of error - */ -int -nm_devlink_get_eswitch_mode(NMDevlink *self, GError **error) -{ - nm_auto_nlmsg struct nl_msg *msg = NULL; - gs_free char *bus = NULL; - gs_free char *addr = NULL; - enum devlink_eswitch_mode eswitch_mode; - gs_free char *err_msg = NULL; - int rc; - - if (!nm_devlink_get_dev_identifier(self, &bus, &addr, error)) - return -1; - - msg = devlink_alloc_msg(self, DEVLINK_CMD_ESWITCH_GET, 0); - NLA_PUT_STRING(msg, DEVLINK_ATTR_BUS_NAME, bus); - NLA_PUT_STRING(msg, DEVLINK_ATTR_DEV_NAME, addr); - - rc = devlink_send_and_recv(self, msg, devlink_parse_eswitch_mode, &eswitch_mode, &err_msg); - if (rc < 0) { - g_set_error(error, - NM_UTILS_ERROR, - NM_UTILS_ERROR_UNKNOWN, - "devlink: get-eswitch-mode: failed (%d) %s", - rc, - err_msg); - return rc; - } - - _LOGD("get-eswitch-mode: success"); - - return (int) eswitch_mode; - -nla_put_failure: - g_return_val_if_reached(-1); -} - -/* - * nm_devlink_set_eswitch_mode: - * @self: the #NMDevlink - * @mode: the eswitch mode to set - * @error: the error location - * - * Set the eswitch mode of the device related to the #NMDevlink instance. Note + * Get the eswitch configuration of the device related to the #NMDevlink instance. Note * that this might be unsupported by the device (see nm_devlink_get_dev()). * * Returns: FALSE in case of error, TRUE otherwise */ gboolean -nm_devlink_set_eswitch_mode(NMDevlink *self, enum devlink_eswitch_mode mode, GError **error) +nm_devlink_get_eswitch_params(NMDevlink *self, NMDevlinkEswitchParams *out_params, GError **error) { nm_auto_nlmsg struct nl_msg *msg = NULL; gs_free char *bus = NULL; @@ -312,26 +272,87 @@ nm_devlink_set_eswitch_mode(NMDevlink *self, enum devlink_eswitch_mode mode, GEr gs_free char *err_msg = NULL; int rc; + nm_assert(out_params); + if (!nm_devlink_get_dev_identifier(self, &bus, &addr, error)) return FALSE; - msg = devlink_alloc_msg(self, DEVLINK_CMD_ESWITCH_SET, 0); + msg = devlink_alloc_msg(self, DEVLINK_CMD_ESWITCH_GET, 0); NLA_PUT_STRING(msg, DEVLINK_ATTR_BUS_NAME, bus); NLA_PUT_STRING(msg, DEVLINK_ATTR_DEV_NAME, addr); - NLA_PUT_U16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode); - rc = devlink_send_and_recv(self, msg, NULL, NULL, &err_msg); + rc = devlink_send_and_recv(self, msg, devlink_parse_eswitch_mode, out_params, &err_msg); if (rc < 0) { g_set_error(error, NM_UTILS_ERROR, NM_UTILS_ERROR_UNKNOWN, - "devlink: set-eswitch-mode: failed (%d) %s", + "devlink: eswitch get: failed (%d) %s", rc, err_msg); return FALSE; } - _LOGD("set-eswitch-mode: success"); + _LOGD("eswitch get: success"); + + return TRUE; + +nla_put_failure: + g_return_val_if_reached(FALSE); +} + +/* + * nm_devlink_set_eswitch_params: + * @self: the #NMDevlink + * @params: the eswitch parameters to set + * @error: the error location + * + * Set the eswitch configuration of the device related to the #NMDevlink instance. Note + * that this might be unsupported by the device (see nm_devlink_get_dev()). + * + * If any of the eswitch parameters is set to "preserve" it won't be modified. + * + * Returns: FALSE in case of error, TRUE otherwise + */ +gboolean +nm_devlink_set_eswitch_params(NMDevlink *self, NMDevlinkEswitchParams params, GError **error) +{ + nm_auto_nlmsg struct nl_msg *msg = NULL; + gs_free char *bus = NULL; + gs_free char *addr = NULL; + gs_free char *err_msg = NULL; + int rc; + + if (params.mode == _NM_SRIOV_ESWITCH_MODE_PRESERVE + && params.inline_mode == _NM_SRIOV_ESWITCH_INLINE_MODE_PRESERVE + && params.encap_mode == _NM_SRIOV_ESWITCH_ENCAP_MODE_PRESERVE) + return TRUE; + + if (!nm_devlink_get_dev_identifier(self, &bus, &addr, error)) + return FALSE; + + msg = devlink_alloc_msg(self, DEVLINK_CMD_ESWITCH_SET, 0); + NLA_PUT_STRING(msg, DEVLINK_ATTR_BUS_NAME, bus); + NLA_PUT_STRING(msg, DEVLINK_ATTR_DEV_NAME, addr); + + if (params.mode != _NM_SRIOV_ESWITCH_MODE_PRESERVE) + NLA_PUT_U16(msg, DEVLINK_ATTR_ESWITCH_MODE, params.mode); + if (params.inline_mode != _NM_SRIOV_ESWITCH_INLINE_MODE_PRESERVE) + NLA_PUT_U8(msg, DEVLINK_ATTR_ESWITCH_INLINE_MODE, params.inline_mode); + if (params.encap_mode != _NM_SRIOV_ESWITCH_ENCAP_MODE_PRESERVE) + NLA_PUT_U8(msg, DEVLINK_ATTR_ESWITCH_ENCAP_MODE, params.encap_mode); + + rc = devlink_send_and_recv(self, msg, NULL, NULL, &err_msg); + if (rc < 0) { + g_set_error(error, + NM_UTILS_ERROR, + NM_UTILS_ERROR_UNKNOWN, + "devlink: eswitch set: failed (%d) %s", + rc, + err_msg); + return FALSE; + } + + _LOGD("eswitch set: success"); return TRUE; diff --git a/src/libnm-platform/devlink/nm-devlink.h b/src/libnm-platform/devlink/nm-devlink.h index 789b13d3ab..c626a1206d 100644 --- a/src/libnm-platform/devlink/nm-devlink.h +++ b/src/libnm-platform/devlink/nm-devlink.h @@ -6,17 +6,25 @@ #ifndef __NMP_DEVLINK_H__ #define __NMP_DEVLINK_H__ +#include "libnm-base/nm-base.h" #include struct nl_sock; typedef struct _NMPlatform NMPlatform; typedef struct _NMDevlink NMDevlink; +typedef struct { + _NMSriovEswitchMode mode; + _NMSriovEswitchInlineMode inline_mode; + _NMSriovEswitchEncapMode encap_mode; +} NMDevlinkEswitchParams; + NMDevlink *nm_devlink_new(NMPlatform *platform, struct nl_sock *genl_sock_sync, int ifindex); gboolean nm_devlink_get_dev_identifier(NMDevlink *self, char **out_bus, char **out_addr, GError **error); -int nm_devlink_get_eswitch_mode(NMDevlink *self, GError **error); gboolean -nm_devlink_set_eswitch_mode(NMDevlink *self, enum devlink_eswitch_mode mode, GError **error); +nm_devlink_get_eswitch_params(NMDevlink *self, NMDevlinkEswitchParams *out_params, GError **error); +gboolean +nm_devlink_set_eswitch_params(NMDevlink *self, NMDevlinkEswitchParams params, GError **error); #endif /* __NMP_DEVLINK_H__ */ \ No newline at end of file diff --git a/src/libnm-platform/nm-linux-platform.c b/src/libnm-platform/nm-linux-platform.c index 1e92f60309..765b02e788 100644 --- a/src/libnm-platform/nm-linux-platform.c +++ b/src/libnm-platform/nm-linux-platform.c @@ -9076,16 +9076,20 @@ sriov_async_step2_set_eswitch_mode(SriovAsyncState *async_state) { NMPlatform *platform = async_state->platform; NMLinuxPlatformPrivate *priv = NM_LINUX_PLATFORM_GET_PRIVATE(platform); - enum devlink_eswitch_mode eswitch_mode = (enum devlink_eswitch_mode) async_state->eswitch_mode; gs_free NMDevlink *devlink = NULL; gs_free_error GError *error = NULL; + NMDevlinkEswitchParams eswitch_params; - _LOGD("setting eswitch-mode to '%d'", (int) eswitch_mode); + _LOGD("setting eswitch-mode to '%d'", (int) async_state->eswitch_mode); + + eswitch_params.mode = (_NMSriovEswitchMode) async_state->eswitch_mode; + eswitch_params.inline_mode = _NM_SRIOV_ESWITCH_INLINE_MODE_PRESERVE; + eswitch_params.encap_mode = _NM_SRIOV_ESWITCH_ENCAP_MODE_PRESERVE; /* We set eswitch mode as a sriov_async step because it's in the middle of * other steps that are async. However, this step itself is synchronous. */ devlink = nm_devlink_new(platform, priv->sk_genl_sync, async_state->ifindex); - if (!nm_devlink_set_eswitch_mode(devlink, eswitch_mode, &error)) { + if (!nm_devlink_set_eswitch_params(devlink, eswitch_params, &error)) { sriov_async_finish_err(async_state, g_steal_pointer(&error)); return; } @@ -9135,6 +9139,7 @@ link_set_sriov_params_async(NMPlatform *platform, int max_vfs; int current_num_vfs; int current_eswitch_mode = _NM_SRIOV_ESWITCH_MODE_PRESERVE; + NMDevlinkEswitchParams eswitch_params; gboolean need_change_eswitch_mode; gboolean need_change_vfs; gboolean need_destroy_vfs; @@ -9202,11 +9207,11 @@ link_set_sriov_params_async(NMPlatform *platform, if (eswitch_mode != _NM_SRIOV_ESWITCH_MODE_PRESERVE) { gs_free NMDevlink *devlink = nm_devlink_new(platform, priv->sk_genl_sync, ifindex); - current_eswitch_mode = nm_devlink_get_eswitch_mode(devlink, &error); - if (current_eswitch_mode < 0) { + if (!nm_devlink_get_eswitch_params(devlink, &eswitch_params, &error)) { sriov_async_finish_err(async_state, g_steal_pointer(&error)); return; } + current_eswitch_mode = eswitch_params.mode; } /* Decide what actions we must do. Note that we might need to destroy the VFs even From 4669f01eb0fb35d32e397c372b14689088cfb59e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= Date: Wed, 14 Feb 2024 11:30:03 +0100 Subject: [PATCH 7/9] sriov: set the devlink's eswitch inline-mode and encap-mode Set these parameters according to the values set in the new properties sriov.eswitch-inline-mode and sriov.eswitch-encap-mode. The number of parameters related to SR-IOV was becoming too big. Refactor to group them in a NMPlatformSriovParams struct and pass it around. --- src/core/devices/nm-device.c | 41 +++++--- src/libnm-platform/devlink/nm-devlink.c | 12 +-- src/libnm-platform/nm-linux-platform.c | 119 +++++++++++++++--------- src/libnm-platform/nm-platform.c | 17 ++-- src/libnm-platform/nm-platform.h | 16 ++-- 5 files changed, 125 insertions(+), 80 deletions(-) diff --git a/src/core/devices/nm-device.c b/src/core/devices/nm-device.c index 07a763429e..34022efbde 100644 --- a/src/core/devices/nm-device.c +++ b/src/core/devices/nm-device.c @@ -137,9 +137,7 @@ typedef struct { GCancellable *cancellable; NMPlatformAsyncCallback callback; gpointer callback_data; - guint num_vfs; - NMOptionBool autoprobe; - NMSriovEswitchMode eswitch_mode; + NMPlatformSriovParams sriov_params; } SriovOp; typedef enum { @@ -7707,9 +7705,7 @@ sriov_op_start(NMDevice *self, SriovOp *op) nm_platform_link_set_sriov_params_async(nm_device_get_platform(self), priv->ifindex, - op->num_vfs, - op->autoprobe, - (_NMSriovEswitchMode) op->eswitch_mode, + op->sriov_params, sriov_op_cb, op, op->cancellable); @@ -7770,12 +7766,14 @@ sriov_op_queue_op(NMDevice *self, SriovOp *op) } static void -sriov_op_queue(NMDevice *self, - guint num_vfs, - NMOptionBool autoprobe, - NMSriovEswitchMode eswitch_mode, - NMPlatformAsyncCallback callback, - gpointer callback_data) +sriov_op_queue(NMDevice *self, + guint num_vfs, + NMOptionBool autoprobe, + NMSriovEswitchMode eswitch_mode, + NMSriovEswitchInlineMode eswitch_inline_mode, + NMSriovEswitchEncapMode eswitch_encap_mode, + NMPlatformAsyncCallback callback, + gpointer callback_data) { SriovOp *op; @@ -7800,9 +7798,14 @@ sriov_op_queue(NMDevice *self, op = g_slice_new(SriovOp); *op = (SriovOp){ - .num_vfs = num_vfs, - .autoprobe = autoprobe, - .eswitch_mode = eswitch_mode, + .sriov_params = + (NMPlatformSriovParams){ + .num_vfs = num_vfs, + .autoprobe = autoprobe, + .eswitch_mode = (_NMSriovEswitchMode) eswitch_mode, + .eswitch_inline_mode = (_NMSriovEswitchInlineMode) eswitch_inline_mode, + .eswitch_encap_mode = (_NMSriovEswitchEncapMode) eswitch_encap_mode, + }, .callback = callback, .callback_data = callback_data, }; @@ -7831,6 +7834,8 @@ device_init_static_sriov_num_vfs(NMDevice *self) num_vfs, NM_OPTION_BOOL_DEFAULT, NM_SRIOV_ESWITCH_MODE_PRESERVE, + NM_SRIOV_ESWITCH_INLINE_MODE_PRESERVE, + NM_SRIOV_ESWITCH_ENCAP_MODE_PRESERVE, NULL, NULL); } @@ -10014,6 +10019,8 @@ activate_stage1_device_prepare(NMDevice *self) nm_setting_sriov_get_total_vfs(s_sriov), NM_TERNARY_TO_OPTION_BOOL(autoprobe), nm_setting_sriov_get_eswitch_mode(s_sriov), + nm_setting_sriov_get_eswitch_inline_mode(s_sriov), + nm_setting_sriov_get_eswitch_encap_mode(s_sriov), sriov_params_cb, nm_utils_user_data_pack(self, g_steal_pointer(&plat_vfs))); priv->stage1_sriov_state = NM_DEVICE_STAGE_STATE_PENDING; @@ -16731,6 +16738,8 @@ _set_state_full(NMDevice *self, NMDeviceState state, NMDeviceStateReason reason, 0, NM_OPTION_BOOL_TRUE, NM_SRIOV_ESWITCH_MODE_PRESERVE, + NM_SRIOV_ESWITCH_INLINE_MODE_PRESERVE, + NM_SRIOV_ESWITCH_ENCAP_MODE_PRESERVE, sriov_reset_on_deactivate_cb, nm_utils_user_data_pack(self, GINT_TO_POINTER(reason))); } @@ -16784,6 +16793,8 @@ _set_state_full(NMDevice *self, NMDeviceState state, NMDeviceStateReason reason, 0, NM_OPTION_BOOL_TRUE, NM_SRIOV_ESWITCH_MODE_PRESERVE, + NM_SRIOV_ESWITCH_INLINE_MODE_PRESERVE, + NM_SRIOV_ESWITCH_ENCAP_MODE_PRESERVE, sriov_reset_on_failure_cb, self); break; diff --git a/src/libnm-platform/devlink/nm-devlink.c b/src/libnm-platform/devlink/nm-devlink.c index a2fee3bcf3..dbd8563850 100644 --- a/src/libnm-platform/devlink/nm-devlink.c +++ b/src/libnm-platform/devlink/nm-devlink.c @@ -171,7 +171,7 @@ err_cb_handler(const struct sockaddr_nl *nla, const struct nlmsgerr *err, void * *result = err->error; nlmsg_parse_error(nlmsg_undata(err), &extack_msg); - _LOGT("error response (%d) %s", err->error, extack_msg ?: nm_strerror(err->error)); + _LOGT("error response (%d - %s)", err->error, extack_msg ?: nm_strerror(err->error)); if (err_msg) *err_msg = g_strdup(extack_msg ?: nm_strerror(err->error)); @@ -212,7 +212,7 @@ devlink_send_and_recv(NMDevlink *self, while (cb_result == CB_RESULT_PENDING) { nle = nl_recvmsgs(self->genl_sock_sync, &cb); if (nle < 0 && nle != -EAGAIN) { - _LOGW("nl_recvmsgs() error: (%d) %s", nle, nm_strerror(nle)); + _LOGW("nl_recvmsgs() error (%d - %s)", nle, nm_strerror(nle)); break; } } @@ -286,13 +286,13 @@ nm_devlink_get_eswitch_params(NMDevlink *self, NMDevlinkEswitchParams *out_param g_set_error(error, NM_UTILS_ERROR, NM_UTILS_ERROR_UNKNOWN, - "devlink: eswitch get: failed (%d) %s", + "devlink: eswitch get failed (%d - %s)", rc, err_msg); return FALSE; } - _LOGD("eswitch get: success"); + _LOGD("eswitch get success"); return TRUE; @@ -346,13 +346,13 @@ nm_devlink_set_eswitch_params(NMDevlink *self, NMDevlinkEswitchParams params, GE g_set_error(error, NM_UTILS_ERROR, NM_UTILS_ERROR_UNKNOWN, - "devlink: eswitch set: failed (%d) %s", + "devlink: eswitch set failed (%d - %s)", rc, err_msg); return FALSE; } - _LOGD("eswitch set: success"); + _LOGD("eswitch set success"); return TRUE; diff --git a/src/libnm-platform/nm-linux-platform.c b/src/libnm-platform/nm-linux-platform.c index 765b02e788..7d34fa939d 100644 --- a/src/libnm-platform/nm-linux-platform.c +++ b/src/libnm-platform/nm-linux-platform.c @@ -8955,10 +8955,9 @@ sriov_set_autoprobe(NMPlatform *platform, #define _SRIOV_ASYNC_MAX_STEPS 4 typedef struct _SriovAsyncState { - NMPlatform *platform; - int ifindex; - guint num_vfs; - _NMSriovEswitchMode eswitch_mode; + NMPlatform *platform; + int ifindex; + NMPlatformSriovParams sriov_params; void (*steps[_SRIOV_ASYNC_MAX_STEPS])(struct _SriovAsyncState *); int current_step; NMPlatformAsyncCallback callback; @@ -9074,17 +9073,20 @@ sriov_async_step1_destroy_vfs(SriovAsyncState *async_state) static void sriov_async_step2_set_eswitch_mode(SriovAsyncState *async_state) { - NMPlatform *platform = async_state->platform; - NMLinuxPlatformPrivate *priv = NM_LINUX_PLATFORM_GET_PRIVATE(platform); - gs_free NMDevlink *devlink = NULL; - gs_free_error GError *error = NULL; - NMDevlinkEswitchParams eswitch_params; + NMPlatform *platform = async_state->platform; + NMLinuxPlatformPrivate *priv = NM_LINUX_PLATFORM_GET_PRIVATE(platform); + gs_free NMDevlink *devlink = NULL; + gs_free_error GError *error = NULL; + NMDevlinkEswitchParams eswitch_params = { + .mode = async_state->sriov_params.eswitch_mode, + .inline_mode = async_state->sriov_params.eswitch_inline_mode, + .encap_mode = async_state->sriov_params.eswitch_encap_mode, + }; - _LOGD("setting eswitch-mode to '%d'", (int) async_state->eswitch_mode); - - eswitch_params.mode = (_NMSriovEswitchMode) async_state->eswitch_mode; - eswitch_params.inline_mode = _NM_SRIOV_ESWITCH_INLINE_MODE_PRESERVE; - eswitch_params.encap_mode = _NM_SRIOV_ESWITCH_ENCAP_MODE_PRESERVE; + _LOGD("setting eswitch params (mode=%d, inline-mode=%d, encap-mode=%d)", + (int) eswitch_params.mode, + (int) eswitch_params.inline_mode, + (int) eswitch_params.encap_mode); /* We set eswitch mode as a sriov_async step because it's in the middle of * other steps that are async. However, this step itself is synchronous. */ @@ -9101,9 +9103,9 @@ static void sriov_async_step3_create_vfs(SriovAsyncState *async_state) { NMPlatform *platform = async_state->platform; - const char *val = nm_sprintf_bufa(32, "%u", async_state->num_vfs); + const char *val = nm_sprintf_bufa(32, "%u", async_state->sriov_params.num_vfs); - _LOGD("setting sriov_numvfs to %u", async_state->num_vfs); + _LOGD("setting sriov_numvfs to %u", async_state->sriov_params.num_vfs); sriov_async_set_num_vfs(async_state, val); } @@ -9114,6 +9116,41 @@ sriov_async_step_finish_ok(SriovAsyncState *async_state) sriov_async_finish_err(async_state, NULL); } +static int +sriov_eswitch_get_needs_change(SriovAsyncState *async_state, + gboolean *out_needs_change, + GError **error) +{ + NMPlatform *platform = async_state->platform; + NMLinuxPlatformPrivate *priv = NM_LINUX_PLATFORM_GET_PRIVATE(platform); + _NMSriovEswitchMode mode = async_state->sriov_params.eswitch_mode; + _NMSriovEswitchInlineMode inline_mode = async_state->sriov_params.eswitch_inline_mode; + _NMSriovEswitchEncapMode encap_mode = async_state->sriov_params.eswitch_encap_mode; + NMDevlinkEswitchParams current_params; + gs_free NMDevlink *devlink = NULL; + + nm_assert(out_needs_change); + + if (mode == _NM_SRIOV_ESWITCH_MODE_PRESERVE + && inline_mode == _NM_SRIOV_ESWITCH_INLINE_MODE_PRESERVE + && encap_mode == _NM_SRIOV_ESWITCH_ENCAP_MODE_PRESERVE) { + *out_needs_change = FALSE; + return 0; + } + + devlink = nm_devlink_new(platform, priv->sk_genl_sync, async_state->ifindex); + + if (!nm_devlink_get_eswitch_params(devlink, ¤t_params, error)) + return -1; + + *out_needs_change = (mode != _NM_SRIOV_ESWITCH_MODE_PRESERVE && mode != current_params.mode) + || (inline_mode != _NM_SRIOV_ESWITCH_INLINE_MODE_PRESERVE + && inline_mode != current_params.inline_mode) + || (encap_mode != _NM_SRIOV_ESWITCH_ENCAP_MODE_PRESERVE + && encap_mode != current_params.encap_mode); + return 0; +} + /* * Take special care when setting new values: * - don't touch anything if the right values are already set @@ -9123,24 +9160,19 @@ sriov_async_step_finish_ok(SriovAsyncState *async_state) static void link_set_sriov_params_async(NMPlatform *platform, int ifindex, - guint num_vfs, - NMOptionBool autoprobe, - _NMSriovEswitchMode eswitch_mode, + NMPlatformSriovParams sriov_params, NMPlatformAsyncCallback callback, gpointer data, GCancellable *cancellable) { SriovAsyncState *async_state; - NMLinuxPlatformPrivate *priv = NM_LINUX_PLATFORM_GET_PRIVATE(platform); nm_auto_pop_netns NMPNetns *netns = NULL; gs_free_error GError *error = NULL; nm_auto_close int dirfd = -1; char ifname[IFNAMSIZ]; int max_vfs; int current_num_vfs; - int current_eswitch_mode = _NM_SRIOV_ESWITCH_MODE_PRESERVE; - NMDevlinkEswitchParams eswitch_params; - gboolean need_change_eswitch_mode; + gboolean need_change_eswitch_params; gboolean need_change_vfs; gboolean need_destroy_vfs; gboolean need_create_vfs; @@ -9152,7 +9184,7 @@ link_set_sriov_params_async(NMPlatform *platform, async_state = g_new0(SriovAsyncState, 1); async_state->platform = g_object_ref(platform); async_state->ifindex = ifindex; - async_state->eswitch_mode = eswitch_mode; + async_state->sriov_params = sriov_params; async_state->current_step = -1; async_state->callback = callback; async_state->data = data; @@ -9190,30 +9222,23 @@ link_set_sriov_params_async(NMPlatform *platform, return; } - if (num_vfs > max_vfs) { - _LOGW("link: device %d only supports %u VFs (requested %u)", ifindex, max_vfs, num_vfs); + if (sriov_params.num_vfs > max_vfs) { + _LOGW("link: device %d only supports %u VFs (requested %u)", + ifindex, + max_vfs, + sriov_params.num_vfs); _LOGW("link: reducing num_vfs to %u for device %d", max_vfs, ifindex); - num_vfs = max_vfs; + sriov_params.num_vfs = max_vfs; + async_state->sriov_params.num_vfs = max_vfs; } - async_state->num_vfs = num_vfs; - /* Setting autoprobe goes first, we can do it synchronously */ - if (num_vfs > 0 && !sriov_set_autoprobe(platform, dirfd, ifname, autoprobe, &error)) { + if (sriov_params.num_vfs > 0 + && !sriov_set_autoprobe(platform, dirfd, ifname, sriov_params.autoprobe, &error)) { sriov_async_finish_err(async_state, g_steal_pointer(&error)); return; } - if (eswitch_mode != _NM_SRIOV_ESWITCH_MODE_PRESERVE) { - gs_free NMDevlink *devlink = nm_devlink_new(platform, priv->sk_genl_sync, ifindex); - - if (!nm_devlink_get_eswitch_params(devlink, &eswitch_params, &error)) { - sriov_async_finish_err(async_state, g_steal_pointer(&error)); - return; - } - current_eswitch_mode = eswitch_params.mode; - } - /* Decide what actions we must do. Note that we might need to destroy the VFs even * if num_vfs == current_num_vfs, for example to change the eswitch mode. Because of * that, we might need to create VFs even if num_vfs == current_num_vfs. @@ -9223,16 +9248,18 @@ link_set_sriov_params_async(NMPlatform *platform, * 3. Create VFs * 4. Invoke caller's callback */ - need_change_eswitch_mode = - eswitch_mode != _NM_SRIOV_ESWITCH_MODE_PRESERVE && current_eswitch_mode != eswitch_mode; - need_change_vfs = num_vfs != current_num_vfs; - need_destroy_vfs = current_num_vfs > 0 && (need_change_eswitch_mode || need_change_vfs); - need_create_vfs = (current_num_vfs == 0 || need_destroy_vfs) && num_vfs > 0; + if (sriov_eswitch_get_needs_change(async_state, &need_change_eswitch_params, &error) < 0) { + sriov_async_finish_err(async_state, g_steal_pointer(&error)); + return; + } + need_change_vfs = sriov_params.num_vfs != current_num_vfs; + need_destroy_vfs = current_num_vfs > 0 && (need_change_eswitch_params || need_change_vfs); + need_create_vfs = (current_num_vfs == 0 || need_destroy_vfs) && sriov_params.num_vfs > 0; i = 0; if (need_destroy_vfs) async_state->steps[i++] = sriov_async_step1_destroy_vfs; - if (need_change_eswitch_mode) + if (need_change_eswitch_params) async_state->steps[i++] = sriov_async_step2_set_eswitch_mode; if (need_create_vfs) async_state->steps[i++] = sriov_async_step3_create_vfs; diff --git a/src/libnm-platform/nm-platform.c b/src/libnm-platform/nm-platform.c index 7fb2a2e2e7..b89b035925 100644 --- a/src/libnm-platform/nm-platform.c +++ b/src/libnm-platform/nm-platform.c @@ -2022,9 +2022,7 @@ nm_platform_link_supports_sriov(NMPlatform *self, int ifindex) void nm_platform_link_set_sriov_params_async(NMPlatform *self, int ifindex, - guint num_vfs, - NMOptionBool autoprobe, - _NMSriovEswitchMode eswitch_mode, + NMPlatformSriovParams sriov_params, NMPlatformAsyncCallback callback, gpointer callback_data, GCancellable *cancellable) @@ -2033,12 +2031,17 @@ nm_platform_link_set_sriov_params_async(NMPlatform *self, g_return_if_fail(ifindex > 0); - _LOG3D("link: setting %u total VFs and autoprobe %d", num_vfs, (int) autoprobe); + _LOG3D("link: setting SR-IOV params (numvfs=%u, autoprobe=%d, eswitch mode=%d inline-mode=%d " + "encap-mode=%d)", + sriov_params.num_vfs, + (int) sriov_params.autoprobe, + (int) sriov_params.eswitch_mode, + (int) sriov_params.eswitch_inline_mode, + (int) sriov_params.eswitch_encap_mode); + klass->link_set_sriov_params_async(self, ifindex, - num_vfs, - autoprobe, - eswitch_mode, + sriov_params, callback, callback_data, cancellable); diff --git a/src/libnm-platform/nm-platform.h b/src/libnm-platform/nm-platform.h index 8aba0e5c11..f6a6ba082b 100644 --- a/src/libnm-platform/nm-platform.h +++ b/src/libnm-platform/nm-platform.h @@ -993,6 +993,14 @@ typedef struct { guint8 public_key[NMP_WIREGUARD_PUBLIC_KEY_LEN]; } _nm_alignas(NMPlatformObject) NMPlatformLnkWireGuard; +typedef struct { + guint num_vfs; + NMOptionBool autoprobe; + _NMSriovEswitchMode eswitch_mode; + _NMSriovEswitchInlineMode eswitch_inline_mode; + _NMSriovEswitchEncapMode eswitch_encap_mode; +} NMPlatformSriovParams; + typedef enum { NM_PLATFORM_WIREGUARD_CHANGE_FLAG_NONE = 0, NM_PLATFORM_WIREGUARD_CHANGE_FLAG_REPLACE_PEERS = (1LL << 0), @@ -1172,9 +1180,7 @@ typedef struct { gboolean (*link_set_name)(NMPlatform *self, int ifindex, const char *name); void (*link_set_sriov_params_async)(NMPlatform *self, int ifindex, - guint num_vfs, - NMOptionBool autoprobe, - _NMSriovEswitchMode eswitch_mode, + NMPlatformSriovParams sriov_params, NMPlatformAsyncCallback callback, gpointer callback_data, GCancellable *cancellable); @@ -2036,9 +2042,7 @@ gboolean nm_platform_link_set_name(NMPlatform *self, int ifindex, const char *na void nm_platform_link_set_sriov_params_async(NMPlatform *self, int ifindex, - guint num_vfs, - NMOptionBool autoprobe, - _NMSriovEswitchMode eswitch_mode, + NMPlatformSriovParams sriov_params, NMPlatformAsyncCallback callback, gpointer callback_data, GCancellable *cancellable); From 27eaf34fcf43c4797c65db8de3a0926b2440f8d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= Date: Thu, 15 Feb 2024 16:58:50 +0100 Subject: [PATCH 8/9] sriov: don't fail if sriov_totalvfs sysfs file is missing If sriov_totalvfs file doesn't exist we don't need to consider it a fatal failure. Try to create the required number of VFs as we were doing before. Note: at least netdevsim doesn't have sriov_totalvfs file, I don't know if there are real drivers that neither has it. --- src/libnm-platform/nm-linux-platform.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/libnm-platform/nm-linux-platform.c b/src/libnm-platform/nm-linux-platform.c index 7d34fa939d..9ecac2d9b3 100644 --- a/src/libnm-platform/nm-linux-platform.c +++ b/src/libnm-platform/nm-linux-platform.c @@ -9218,8 +9218,9 @@ link_set_sriov_params_async(NMPlatform *platform, max_vfs = sriov_read_sysctl_uint(platform, dirfd, ifname, "sriov_totalvfs", &error); if (max_vfs < 0) { - sriov_async_finish_err(async_state, g_steal_pointer(&error)); - return; + _LOGD("link: can't read max VFs (%s)", error->message); + g_clear_error(&error); + max_vfs = sriov_params.num_vfs; /* Try to create all */ } if (sriov_params.num_vfs > max_vfs) { From 7346c5b556d239b4e7bb17014fae5e6586f88603 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= Date: Fri, 16 Feb 2024 13:47:38 +0100 Subject: [PATCH 9/9] sriov: allow reading empty eswitch paramaters via Devlink Probably not all drivers and devices return all parameters. Set them to "unknown" if they are missing and let the caller to decide what to do. In our case, if the sriov setting has a value different to "preserve" it will try to set it (and will probably fail). But if the missing parameter is set to "preserve" in the sriov setting we can continue, just ignoring it. --- src/libnm-base/nm-base.h | 3 +++ src/libnm-core-public/nm-setting-sriov.h | 3 +++ src/libnm-platform/devlink/nm-devlink.c | 16 ++++++++++------ 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/libnm-base/nm-base.h b/src/libnm-base/nm-base.h index d77ff9ec1e..e1cc27332e 100644 --- a/src/libnm-base/nm-base.h +++ b/src/libnm-base/nm-base.h @@ -281,6 +281,7 @@ typedef enum { /* Mirrors libnm's NMSriovEswitchMode. * Values >= 0 mirror kernel's enum devlink_eswitch_mode. */ _NM_SRIOV_ESWITCH_MODE_PRESERVE = -1, + _NM_SRIOV_ESWITCH_MODE_UNKNOWN = -1, /*< skip >*/ _NM_SRIOV_ESWITCH_MODE_LEGACY = 0, _NM_SRIOV_ESWITCH_MODE_SWITCHDEV = 1, } _NMSriovEswitchMode; @@ -289,6 +290,7 @@ typedef enum { /* Mirrors libnm's NMSriovEswitchInlineMode. * Values >= 0 mirror kernel's enum devlink_eswitch_inline_mode. */ _NM_SRIOV_ESWITCH_INLINE_MODE_PRESERVE = -1, + _NM_SRIOV_ESWITCH_INLINE_MODE_UNKNOWN = -1, /*< skip >*/ _NM_SRIOV_ESWITCH_INLINE_MODE_NONE = 0, _NM_SRIOV_ESWITCH_INLINE_MODE_LINK = 1, _NM_SRIOV_ESWITCH_INLINE_MODE_NETWORK = 2, @@ -299,6 +301,7 @@ typedef enum { /* Mirrors libnm's NMSriovEswitchEncapMode. * Values >= 0 mirror kernel's enum devlink_eswitch_encap_mode. */ _NM_SRIOV_ESWITCH_ENCAP_MODE_PRESERVE = -1, + _NM_SRIOV_ESWITCH_ENCAP_MODE_UNKNOWN = -1, /*< skip >*/ _NM_SRIOV_ESWITCH_ENCAP_MODE_NONE = 0, _NM_SRIOV_ESWITCH_ENCAP_MODE_BASIC = 1, } _NMSriovEswitchEncapMode; diff --git a/src/libnm-core-public/nm-setting-sriov.h b/src/libnm-core-public/nm-setting-sriov.h index e210ccbdc4..affccc4892 100644 --- a/src/libnm-core-public/nm-setting-sriov.h +++ b/src/libnm-core-public/nm-setting-sriov.h @@ -66,6 +66,7 @@ typedef enum { */ typedef enum { NM_SRIOV_ESWITCH_MODE_PRESERVE = -1, + NM_SRIOV_ESWITCH_MODE_UNKNOWN = -1, /*< skip >*/ NM_SRIOV_ESWITCH_MODE_LEGACY = 0, NM_SRIOV_ESWITCH_MODE_SWITCHDEV = 1, } NMSriovEswitchMode; @@ -82,6 +83,7 @@ typedef enum { */ typedef enum { NM_SRIOV_ESWITCH_INLINE_MODE_PRESERVE = -1, + NM_SRIOV_ESWITCH_INLINE_MODE_UNKNOWN = -1, /*< skip >*/ NM_SRIOV_ESWITCH_INLINE_MODE_NONE = 0, NM_SRIOV_ESWITCH_INLINE_MODE_LINK = 1, NM_SRIOV_ESWITCH_INLINE_MODE_NETWORK = 2, @@ -98,6 +100,7 @@ typedef enum { */ typedef enum { NM_SRIOV_ESWITCH_ENCAP_MODE_PRESERVE = -1, + NM_SRIOV_ESWITCH_ENCAP_MODE_UNKNOWN = -1, /*< skip >*/ NM_SRIOV_ESWITCH_ENCAP_MODE_NONE = 0, NM_SRIOV_ESWITCH_ENCAP_MODE_BASIC = 1, } NMSriovEswitchEncapMode; diff --git a/src/libnm-platform/devlink/nm-devlink.c b/src/libnm-platform/devlink/nm-devlink.c index dbd8563850..f06697cf59 100644 --- a/src/libnm-platform/devlink/nm-devlink.c +++ b/src/libnm-platform/devlink/nm-devlink.c @@ -237,18 +237,22 @@ devlink_parse_eswitch_mode(const struct nl_msg *msg, void *data) NMDevlinkEswitchParams *params = data; struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); struct nlattr *tb[G_N_ELEMENTS(eswitch_policy)]; + struct nlattr *nla; if (nla_parse_arr(tb, genlmsg_attrdata(gnlh, 0), genlmsg_attrlen(gnlh, 0), eswitch_policy) < 0) return NL_SKIP; - if (!tb[DEVLINK_ATTR_ESWITCH_MODE] || !tb[DEVLINK_ATTR_ESWITCH_INLINE_MODE] - || !tb[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]) - return NL_SKIP; + nla = tb[DEVLINK_ATTR_ESWITCH_MODE]; + params->mode = nla ? (_NMSriovEswitchMode) nla_get_u16(nla) : _NM_SRIOV_ESWITCH_MODE_UNKNOWN; - params->mode = (_NMSriovEswitchMode) nla_get_u16(tb[DEVLINK_ATTR_ESWITCH_MODE]); - params->encap_mode = (_NMSriovEswitchEncapMode) nla_get_u8(tb[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]); + nla = tb[DEVLINK_ATTR_ESWITCH_INLINE_MODE]; params->inline_mode = - (_NMSriovEswitchInlineMode) nla_get_u8(tb[DEVLINK_ATTR_ESWITCH_INLINE_MODE]); + nla ? (_NMSriovEswitchInlineMode) nla_get_u8(nla) : _NM_SRIOV_ESWITCH_INLINE_MODE_UNKNOWN; + + nla = tb[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]; + params->encap_mode = + nla ? (_NMSriovEswitchEncapMode) nla_get_u8(nla) : _NM_SRIOV_ESWITCH_ENCAP_MODE_UNKNOWN; + return NL_OK; }