platform: netlink: add devlink support

Add support for Devlink, which is just another family of Generic Netlink
like nl80211. Implement get_eswitch_mode and set_eswitch_mode to allow
changing between legacy SRIOV and switchdev modes.

Devlink's purpose is to allow querying and configuring stuff related to
a piece of hardware but not to any of the usual Linux device classes.

For example, nowadays the Smart NICs normally allow to change the
eswitch mode per PF, because their hardware implements one eswitch per
PF, but future models might have a single eswitch for all the physical
and virtual ports of the NIC allowing more advanced bridge offloads.

Regarding the above example, for the moment we only support PCI network
devices with the "one eswitch per PF" model. The reason is that currently
NM only knows about netdevs so dealing with "devlink devices" that
doesn't map 1-1 with a netdev would require new mechanisms to understand
what they are and their relation with the netdevs that NM manage. We
will deal with that use cases when they arise and we have more
information about the right way to support them.

(cherry picked from commit f31d29bbb7)
This commit is contained in:
Íñigo Huguet 2024-01-15 12:08:07 +01:00
parent c31f31acbf
commit 61788cf901
6 changed files with 370 additions and 0 deletions

View file

@ -672,6 +672,8 @@ src_libnm_platform_libnm_platform_la_SOURCES = \
src/libnm-platform/nmp-object.h \
src/libnm-platform/nmp-plobj.c \
src/libnm-platform/nmp-plobj.h \
src/libnm-platform/devlink/nm-devlink.c \
src/libnm-platform/devlink/nm-devlink.h \
src/libnm-platform/wifi/nm-wifi-utils-nl80211.c \
src/libnm-platform/wifi/nm-wifi-utils-nl80211.h \
src/libnm-platform/wifi/nm-wifi-utils-private.h \

View file

@ -0,0 +1,340 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
* Copyright (C) 2024 Red Hat, Inc.
*/
#include "libnm-glib-aux/nm-default-glib-i18n-lib.h"
#include "nm-devlink.h"
#include <linux/if.h>
#include <linux/devlink.h>
#include "libnm-log-core/nm-logging.h"
#include "libnm-platform/nm-netlink.h"
#include "libnm-platform/nm-platform.h"
#include "libnm-platform/nm-platform-utils.h"
#define _NMLOG_PREFIX_NAME "devlink"
#define _NMLOG_DOMAIN LOGD_PLATFORM | LOGD_DEVICE
#define _NMLOG(level, ...) \
G_STMT_START \
{ \
char _ifname_buf[IFNAMSIZ]; \
const char *_ifname = self ? nmp_utils_if_indextoname(self->ifindex, _ifname_buf) : NULL; \
\
nm_log((level), \
_NMLOG_DOMAIN, \
_ifname ?: NULL, \
NULL, \
"%s%s%s%s: " _NM_UTILS_MACRO_FIRST(__VA_ARGS__), \
_NMLOG_PREFIX_NAME, \
NM_PRINT_FMT_QUOTED(_ifname, " (", _ifname, ")", "") \
_NM_UTILS_MACRO_REST(__VA_ARGS__)); \
} \
G_STMT_END
#define CB_RESULT_PENDING 0
#define CB_RESULT_OK 1
struct _NMDevlink {
NMPlatform *plat;
struct nl_sock *genl_sock_sync;
guint16 genl_family_id;
int ifindex;
};
/**
* nm_devlink_new:
* @platform: the #NMPlatform that will use this #NMDevlink instance
* @genl_sock_sync: the netlink socket (will be used synchronously)
* @ifindex: the kernel's netdev ifindex corresponding to the devlink device
*
* Create a new #NMDevlink instance to make devlink queries regarding a specific
* device.
*
* Returns: (transfer full): the allocated new #NMDevlink
*/
NMDevlink *
nm_devlink_new(NMPlatform *platform, struct nl_sock *genl_sock_sync, int ifindex)
{
NMDevlink *self = g_new(NMDevlink, 1);
self->plat = platform;
self->genl_sock_sync = genl_sock_sync;
self->genl_family_id = nm_platform_genl_get_family_id(platform, NMP_GENL_FAMILY_TYPE_DEVLINK);
self->ifindex = ifindex;
return self;
}
/**
* nm_devlink_get_dev_identifier:
* @self: the #NMDevlink
* @out_bus: (out): the "bus_name" part of the devlink device identifier
* @out_addr: (out): the "bus_addr" part of the devlink device identifier
* @error: (optional): the error location
*
* Get the devlink device identifier of the device for which the #NMDevlink was
* created (with the @ifindex argument of nm_devlink_get_new()). A devlink device
* is identified as "bus_name/bus_addr" (i.e. "pci/0000:65:00.0"). This function
* provides both parts separately.
*
* Note that here we only get the potential devlink device identifier. The real devlink
* device might not even exist if the hw doesn't implement devlink or the netdev
* doesn't have a 1-1 corresponding devlink device (i.e. because it's a VF or
* because the hw uses a "one eswitch for many ports" model).
*
* Also note that currently only PCI devices are supported, an error will be
* returned for other kind of devices.
*
* Returns: FALSE in case of error, TRUE otherwise
*/
gboolean
nm_devlink_get_dev_identifier(NMDevlink *self, char **out_bus, char **out_addr, GError **error)
{
const char *bus;
char sbuf[IFNAMSIZ];
NMPUtilsEthtoolDriverInfo ethtool_driver_info;
nm_assert(out_bus != NULL && out_addr != NULL);
nm_assert(!error || !*error);
if (!nm_platform_link_get_udev_property(self->plat, self->ifindex, "ID_BUS", &bus)) {
g_set_error(error,
NM_UTILS_ERROR,
NM_UTILS_ERROR_UNKNOWN,
"Can't get udev info for device '%s'",
nmp_utils_if_indextoname(self->ifindex, sbuf));
return FALSE;
}
if (!nm_streq0(bus, "pci")) {
g_set_error(error,
NM_UTILS_ERROR,
NM_UTILS_ERROR_UNKNOWN,
"Devlink is only supported for PCI but device '%s' has bus name '%s'",
nmp_utils_if_indextoname(self->ifindex, sbuf),
bus);
return FALSE;
}
if (!nmp_utils_ethtool_get_driver_info(self->ifindex, &ethtool_driver_info)) {
g_set_error(error,
NM_UTILS_ERROR,
NM_UTILS_ERROR_UNKNOWN,
"Can't get ethtool driver info for device '%s'",
nmp_utils_if_indextoname(self->ifindex, sbuf));
return FALSE;
}
*out_bus = g_strdup("pci");
*out_addr = g_strdup(ethtool_driver_info._private_bus_info);
return TRUE;
}
static struct nl_msg *
devlink_alloc_msg(NMDevlink *self, uint8_t cmd, uint16_t flags)
{
nm_auto_nlmsg struct nl_msg *msg = nlmsg_alloc(0);
if (!msg)
return NULL;
genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, self->genl_family_id, 0, flags, cmd, 0);
return g_steal_pointer(&msg);
}
static int
ack_cb_handler(const struct nl_msg *msg, void *data)
{
int *result = data;
*result = CB_RESULT_OK;
return NL_STOP;
}
static int
finish_cb_handler(const struct nl_msg *msg, void *data)
{
int *result = data;
*result = CB_RESULT_OK;
return NL_SKIP;
}
static int
err_cb_handler(const struct sockaddr_nl *nla, const struct nlmsgerr *err, void *data)
{
void **args = data;
NMDevlink *self = args[0];
int *result = args[1];
char **err_msg = args[2];
const char *extack_msg = NULL;
*result = err->error;
nlmsg_parse_error(nlmsg_undata(err), &extack_msg);
_LOGT("error response (%d) %s", err->error, extack_msg ?: nm_strerror(err->error));
if (err_msg)
*err_msg = g_strdup(extack_msg ?: nm_strerror(err->error));
return NL_SKIP;
}
static int
devlink_send_and_recv(NMDevlink *self,
struct nl_msg *msg,
int (*valid_handler)(const struct nl_msg *, void *),
void *valid_data,
char **err_msg)
{
int nle;
int cb_result = CB_RESULT_PENDING;
void *err_arg[] = {self, &cb_result, err_msg};
const struct nl_cb cb = {
.err_cb = err_cb_handler,
.err_arg = err_arg,
.finish_cb = finish_cb_handler,
.finish_arg = &cb_result,
.ack_cb = ack_cb_handler,
.ack_arg = &cb_result,
.valid_cb = valid_handler,
.valid_arg = valid_data,
};
g_return_val_if_fail(msg != NULL, -ENOMEM);
if (err_msg)
*err_msg = NULL;
nle = nl_send_auto(self->genl_sock_sync, msg);
if (nle < 0)
goto out;
while (cb_result == CB_RESULT_PENDING) {
nle = nl_recvmsgs(self->genl_sock_sync, &cb);
if (nle < 0 && nle != -EAGAIN) {
_LOGW("nl_recvmsgs() error: (%d) %s", nle, nm_strerror(nle));
break;
}
}
out:
if (nle < 0 && err_msg && *err_msg == NULL)
*err_msg = strdup(nm_strerror(nle));
if (nle >= 0 && cb_result < 0)
nle = cb_result;
return nle;
}
static int
devlink_parse_eswitch_mode(const struct nl_msg *msg, void *data)
{
static const struct nla_policy eswitch_policy[] = {
[DEVLINK_ATTR_ESWITCH_MODE] = {.type = NLA_U16},
};
enum devlink_eswitch_mode *eswitch_mode = data;
struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
struct nlattr *tb[G_N_ELEMENTS(eswitch_policy)];
if (nla_parse_arr(tb, genlmsg_attrdata(gnlh, 0), genlmsg_attrlen(gnlh, 0), eswitch_policy) < 0)
return NL_SKIP;
if (!tb[DEVLINK_ATTR_ESWITCH_MODE])
return NL_SKIP;
*eswitch_mode = nla_get_u16(tb[DEVLINK_ATTR_ESWITCH_MODE]);
return NL_OK;
}
/*
* nm_devlink_get_eswitch_mode:
* @self: the #NMDevlink
* @error: the error location
*
* Get the eswitch mode of the device related to the #NMDevlink instance. Note
* that this might be unsupported by the device (see nm_devlink_get_dev()).
*
* Returns: the eswitch mode of the device, or <0 in case of error
*/
int
nm_devlink_get_eswitch_mode(NMDevlink *self, GError **error)
{
nm_auto_nlmsg struct nl_msg *msg = NULL;
gs_free char *bus = NULL;
gs_free char *addr = NULL;
enum devlink_eswitch_mode eswitch_mode;
gs_free char *err_msg = NULL;
int rc;
if (!nm_devlink_get_dev_identifier(self, &bus, &addr, error))
return -1;
msg = devlink_alloc_msg(self, DEVLINK_CMD_ESWITCH_GET, 0);
NLA_PUT_STRING(msg, DEVLINK_ATTR_BUS_NAME, bus);
NLA_PUT_STRING(msg, DEVLINK_ATTR_DEV_NAME, addr);
rc = devlink_send_and_recv(self, msg, devlink_parse_eswitch_mode, &eswitch_mode, &err_msg);
if (rc < 0) {
g_set_error(error,
NM_UTILS_ERROR,
NM_UTILS_ERROR_UNKNOWN,
"devlink: get-eswitch-mode: failed (%d) %s",
rc,
err_msg);
return rc;
}
_LOGD("get-eswitch-mode: success");
return (int) eswitch_mode;
nla_put_failure:
g_return_val_if_reached(-1);
}
/*
* nm_devlink_set_eswitch_mode:
* @self: the #NMDevlink
* @mode: the eswitch mode to set
* @error: the error location
*
* Set the eswitch mode of the device related to the #NMDevlink instance. Note
* that this might be unsupported by the device (see nm_devlink_get_dev()).
*
* Returns: FALSE in case of error, TRUE otherwise
*/
gboolean
nm_devlink_set_eswitch_mode(NMDevlink *self, enum devlink_eswitch_mode mode, GError **error)
{
nm_auto_nlmsg struct nl_msg *msg = NULL;
gs_free char *bus = NULL;
gs_free char *addr = NULL;
gs_free char *err_msg = NULL;
int rc;
if (!nm_devlink_get_dev_identifier(self, &bus, &addr, error))
return FALSE;
msg = devlink_alloc_msg(self, DEVLINK_CMD_ESWITCH_SET, 0);
NLA_PUT_STRING(msg, DEVLINK_ATTR_BUS_NAME, bus);
NLA_PUT_STRING(msg, DEVLINK_ATTR_DEV_NAME, addr);
NLA_PUT_U16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode);
rc = devlink_send_and_recv(self, msg, NULL, NULL, &err_msg);
if (rc < 0) {
g_set_error(error,
NM_UTILS_ERROR,
NM_UTILS_ERROR_UNKNOWN,
"devlink: set-eswitch-mode: failed (%d) %s",
rc,
err_msg);
return FALSE;
}
_LOGD("set-eswitch-mode: success");
return TRUE;
nla_put_failure:
g_return_val_if_reached(FALSE);
}

View file

@ -0,0 +1,22 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
* Copyright (C) 2024 Red Hat, Inc.
*/
#ifndef __NMP_DEVLINK_H__
#define __NMP_DEVLINK_H__
#include <linux/devlink.h>
struct nl_sock;
typedef struct _NMPlatform NMPlatform;
typedef struct _NMDevlink NMDevlink;
NMDevlink *nm_devlink_new(NMPlatform *platform, struct nl_sock *genl_sock_sync, int ifindex);
gboolean
nm_devlink_get_dev_identifier(NMDevlink *self, char **out_bus, char **out_addr, GError **error);
int nm_devlink_get_eswitch_mode(NMDevlink *self, GError **error);
gboolean
nm_devlink_set_eswitch_mode(NMDevlink *self, enum devlink_eswitch_mode mode, GError **error);
#endif /* __NMP_DEVLINK_H__ */

View file

@ -12,6 +12,7 @@ libnm_platform = static_library(
'nmp-netns.c',
'nmp-object.c',
'nmp-plobj.c',
'devlink/nm-devlink.c',
'wifi/nm-wifi-utils-nl80211.c',
'wifi/nm-wifi-utils.c',
'wpan/nm-wpan-utils.c',

View file

@ -452,6 +452,10 @@ _nm_platform_kernel_support_init(NMPlatformKernelSupportType type, int value)
/*****************************************************************************/
const NMPGenlFamilyInfo nmp_genl_family_infos[_NMP_GENL_FAMILY_TYPE_NUM] = {
[NMP_GENL_FAMILY_TYPE_DEVLINK] =
{
.name = "devlink",
},
[NMP_GENL_FAMILY_TYPE_ETHTOOL] =
{
.name = "ethtool",

View file

@ -1084,6 +1084,7 @@ nm_platform_kernel_support_get(NMPlatformKernelSupportType type)
}
typedef enum {
NMP_GENL_FAMILY_TYPE_DEVLINK,
NMP_GENL_FAMILY_TYPE_ETHTOOL,
NMP_GENL_FAMILY_TYPE_MPTCP_PM,
NMP_GENL_FAMILY_TYPE_NL80211,