diff --git a/man/systemd.netdev.xml b/man/systemd.netdev.xml
index e4e7e611e77..255b85f4049 100644
--- a/man/systemd.netdev.xml
+++ b/man/systemd.netdev.xml
@@ -189,6 +189,9 @@
batadv
B.A.T.M.A.N. Advanced is a routing protocol for multi-hop mobile ad-hoc networks which operates on layer 2.
+
+ ipoib
+ An IP over Infiniband subinterface.
@@ -2125,6 +2128,49 @@
+
+ [IPoIB] Section Options
+ The [IPoIB] section only applies for netdevs of kind ipoib and accepts the
+ following keys:
+
+
+
+ PartitionKey=
+
+ Takes an integer in the range 1…0xffff, except for 0x8000. Defaults to unset, and the
+ kernel's default is used.
+
+
+
+
+ Mode=
+
+ Takes one of the special values datagram or
+ connected. Defaults to unset, and the kernel's default is used.
+
+ When datagram, the Infiniband unreliable datagram (UD) transport is
+ used, and so the interface MTU is equal to the IB L2 MTU minus the IPoIB encapsulation
+ header (4 bytes). For example, in a typical IB fabric with a 2K MTU, the IPoIB MTU will be
+ 2048 - 4 = 2044 bytes.
+
+ When connected, the Infiniband reliable connected (RC) transport is
+ used. Connected mode takes advantage of the connected nature of the IB transport and allows
+ an MTU up to the maximal IP packet size of 64K, which reduces the number of IP packets needed
+ for handling large UDP datagrams, TCP segments, etc and increases the performance for large
+ messages.
+
+
+
+
+ IgnoreUserspaceMulticastGroup=
+
+ Takes an boolean value. When true, the kernel ignores multicast groups handled by
+ userspace. Defaults to unset, and the kernel's default is used.
+
+
+
+
+
Examples
diff --git a/man/systemd.network.xml b/man/systemd.network.xml
index 50367ecdcd7..e8e01f9094c 100644
--- a/man/systemd.network.xml
+++ b/man/systemd.network.xml
@@ -902,6 +902,7 @@ Table=1234
+ IPoIB=
IPVLAN=
IPVTAP=
L2TP=
@@ -913,8 +914,8 @@ Table=1234
VXLAN=
Xfrm=
- The name of an IPVLAN, IPVTAP, L2TP, MACsec, MACVLAN, MACVTAP, tunnel, VLAN, VXLAN, or
- Xfrm to be created on the link. See
+ The name of an IPoIB, IPVLAN, IPVTAP, L2TP, MACsec, MACVLAN, MACVTAP, tunnel, VLAN,
+ VXLAN, or Xfrm to be created on the link. See
systemd.netdev5.
This option may be specified more than once.
@@ -3226,6 +3227,15 @@ Token=prefixstable:2002:da8:1::
+
+ [IPoIB] Section Options
+ The [IPoIB] section manages the IP over Infiniband and accepts the following keys:
+
+
+
+
+
+
[QDisc] Section Options
The [QDisc] section manages the traffic control queueing discipline (qdisc).
diff --git a/src/libsystemd/sd-netlink/netlink-message.c b/src/libsystemd/sd-netlink/netlink-message.c
index 874fffe9d93..182c74ed3d8 100644
--- a/src/libsystemd/sd-netlink/netlink-message.c
+++ b/src/libsystemd/sd-netlink/netlink-message.c
@@ -147,7 +147,7 @@ sd_netlink_message *sd_netlink_message_unref(sd_netlink_message *m) {
int sd_netlink_message_get_type(sd_netlink_message *m, uint16_t *type) {
assert_return(m, -EINVAL);
- assert_return(type, -EINVAL);
+ assert_return(type != 0, -EINVAL);
*type = m->hdr->nlmsg_type;
@@ -156,7 +156,7 @@ int sd_netlink_message_get_type(sd_netlink_message *m, uint16_t *type) {
int sd_netlink_message_set_flags(sd_netlink_message *m, uint16_t flags) {
assert_return(m, -EINVAL);
- assert_return(flags, -EINVAL);
+ assert_return(flags != 0, -EINVAL);
m->hdr->nlmsg_flags = flags;
diff --git a/src/libsystemd/sd-netlink/netlink-types-rtnl.c b/src/libsystemd/sd-netlink/netlink-types-rtnl.c
index 624422eea27..167bbc5ccfc 100644
--- a/src/libsystemd/sd-netlink/netlink-types-rtnl.c
+++ b/src/libsystemd/sd-netlink/netlink-types-rtnl.c
@@ -210,7 +210,7 @@ static const NLType rtnl_link_info_data_geneve_types[] = {
[IFLA_GENEVE_DF] = { .type = NETLINK_TYPE_U8 },
};
-static const NLType rtnl_link_info_data_gre_types[] = {
+static const NLType rtnl_link_info_data_gre_types[] = {
[IFLA_GRE_LINK] = { .type = NETLINK_TYPE_U32 },
[IFLA_GRE_IFLAGS] = { .type = NETLINK_TYPE_U16 },
[IFLA_GRE_OFLAGS] = { .type = NETLINK_TYPE_U16 },
@@ -237,6 +237,12 @@ static const NLType rtnl_link_info_data_gre_types[] = {
[IFLA_GRE_ERSPAN_HWID] = { .type = NETLINK_TYPE_U16 },
};
+static const NLType rtnl_link_info_data_ipoib_types[] = {
+ [IFLA_IPOIB_PKEY] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_IPOIB_MODE] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_IPOIB_UMCAST] = { .type = NETLINK_TYPE_U16 },
+};
+
/* IFLA_IPTUN_ attributes are used in ipv4/ipip.c, ipv6/ip6_tunnel.c, and ipv6/sit.c. And unfortunately,
* IFLA_IPTUN_FLAGS is used with different types, ugh... */
#define DEFINE_IPTUN_TYPES(name, flags_type) \
@@ -410,9 +416,7 @@ static const NLTypeSystemUnionElement rtnl_link_info_data_type_systems[] = {
{ .name = "ip6gre", .type_system = TYPE_SYSTEM_FROM_TYPE(rtnl_link_info_data_gre), },
{ .name = "ip6gretap", .type_system = TYPE_SYSTEM_FROM_TYPE(rtnl_link_info_data_gre), },
{ .name = "ip6tnl", .type_system = TYPE_SYSTEM_FROM_TYPE(rtnl_link_info_data_iptun), },
-/*
{ .name = "ipoib", .type_system = TYPE_SYSTEM_FROM_TYPE(rtnl_link_info_data_ipoib), },
-*/
{ .name = "ipip", .type_system = TYPE_SYSTEM_FROM_TYPE(rtnl_link_info_data_iptun), },
{ .name = "ipvlan", .type_system = TYPE_SYSTEM_FROM_TYPE(rtnl_link_info_data_ipvlan), },
{ .name = "ipvtap", .type_system = TYPE_SYSTEM_FROM_TYPE(rtnl_link_info_data_ipvlan), },
diff --git a/src/network/meson.build b/src/network/meson.build
index cfa16a8ecf0..c1cf227ffcb 100644
--- a/src/network/meson.build
+++ b/src/network/meson.build
@@ -13,6 +13,8 @@ sources = files('''
netdev/dummy.h
netdev/ifb.c
netdev/ifb.h
+ netdev/ipoib.c
+ netdev/ipoib.h
netdev/ipvlan.c
netdev/ipvlan.h
netdev/macvlan.c
diff --git a/src/network/netdev/ipoib.c b/src/network/netdev/ipoib.c
new file mode 100644
index 00000000000..c3f583fc467
--- /dev/null
+++ b/src/network/netdev/ipoib.c
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include
+#include
+
+#include "ipoib.h"
+#include "networkd-network.h"
+#include "parse-util.h"
+#include "string-table.h"
+
+assert_cc((int) IP_OVER_INFINIBAND_MODE_DATAGRAM == (int) IPOIB_MODE_DATAGRAM);
+assert_cc((int) IP_OVER_INFINIBAND_MODE_CONNECTED == (int) IPOIB_MODE_CONNECTED);
+
+static void netdev_ipoib_init(NetDev *netdev) {
+ IPoIB *ipoib;
+
+ assert(netdev);
+
+ ipoib = IPOIB(netdev);
+
+ assert(ipoib);
+
+ ipoib->mode = _IP_OVER_INFINIBAND_MODE_INVALID;
+ ipoib->umcast = -1;
+}
+
+static int netdev_ipoib_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ IPoIB *ipoib;
+ int r;
+
+ assert(netdev);
+ assert(link);
+ assert(m);
+
+ ipoib = IPOIB(netdev);
+
+ assert(ipoib);
+
+ if (ipoib->pkey > 0) {
+ r = sd_netlink_message_append_u16(m, IFLA_IPOIB_PKEY, ipoib->pkey);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPOIB_PKEY attribute: %m");
+ }
+
+ if (ipoib->mode >= 0) {
+ r = sd_netlink_message_append_u16(m, IFLA_IPOIB_MODE, ipoib->mode);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPOIB_MODE attribute: %m");
+ }
+
+ if (ipoib->umcast >= 0) {
+ r = sd_netlink_message_append_u16(m, IFLA_IPOIB_UMCAST, ipoib->umcast);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPOIB_UMCAST attribute: %m");
+ }
+
+ return 0;
+}
+
+int ipoib_set_netlink_message(Link *link, sd_netlink_message *m) {
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(m);
+
+ r = sd_netlink_message_set_flags(m, NLM_F_REQUEST | NLM_F_ACK);
+ if (r < 0)
+ return log_link_debug_errno(link, r, "Could not set netlink flags: %m");
+
+ r = sd_netlink_message_open_container(m, IFLA_LINKINFO);
+ if (r < 0)
+ return log_link_debug_errno(link, r, "Failed to open IFLA_LINKINFO container: %m");
+
+ r = sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, link->kind);
+ if (r < 0)
+ return log_link_debug_errno(link, r, "Could not open IFLA_INFO_DATA container: %m");
+
+ if (link->network->ipoib_mode >= 0) {
+ r = sd_netlink_message_append_u16(m, IFLA_IPOIB_MODE, link->network->ipoib_mode);
+ if (r < 0)
+ return log_link_debug_errno(link, r, "Could not append IFLA_IPOIB_MODE attribute: %m");
+ }
+
+ if (link->network->ipoib_umcast >= 0) {
+ r = sd_netlink_message_append_u16(m, IFLA_IPOIB_UMCAST, link->network->ipoib_umcast);
+ if (r < 0)
+ return log_link_debug_errno(link, r, "Could not append IFLA_IPOIB_UMCAST attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_link_debug_errno(link, r, "Failed to close IFLA_INFO_DATA container: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_link_debug_errno(link, r, "Failed to close IFLA_LINKINFO container: %m");
+
+ return 0;
+}
+
+static const char * const ipoib_mode_table[_IP_OVER_INFINIBAND_MODE_MAX] = {
+ [IP_OVER_INFINIBAND_MODE_DATAGRAM] = "datagram",
+ [IP_OVER_INFINIBAND_MODE_CONNECTED] = "connected",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(ipoib_mode, IPoIBMode);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_ipoib_mode, ipoib_mode, IPoIBMode, "Failed to parse IPoIB mode");
+
+int config_parse_ipoib_pkey(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint16_t u, *pkey = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ *pkey = 0; /* 0 means unset. */
+ return 0;
+ }
+
+ r = safe_atou16(rvalue, &u);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse IPoIB pkey '%s', ignoring assignment: %m",
+ rvalue);
+ return 0;
+ }
+ if (u == 0 || u == 0x8000) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "IPoIB pkey cannot be 0 nor 0x8000, ignoring assignment: %s",
+ rvalue);
+ return 0;
+ }
+
+ *pkey = u;
+ return 0;
+}
+
+
+const NetDevVTable ipoib_vtable = {
+ .object_size = sizeof(IPoIB),
+ .sections = NETDEV_COMMON_SECTIONS "IPoIB\0",
+ .init = netdev_ipoib_init,
+ .fill_message_create = netdev_ipoib_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .iftype = ARPHRD_INFINIBAND,
+ .generate_mac = true,
+};
diff --git a/src/network/netdev/ipoib.h b/src/network/netdev/ipoib.h
new file mode 100644
index 00000000000..415d3b107c8
--- /dev/null
+++ b/src/network/netdev/ipoib.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include
+
+#include "conf-parser.h"
+#include "netdev.h"
+
+typedef enum IPoIBMode {
+ IP_OVER_INFINIBAND_MODE_DATAGRAM,
+ IP_OVER_INFINIBAND_MODE_CONNECTED,
+ _IP_OVER_INFINIBAND_MODE_MAX,
+ _IP_OVER_INFINIBAND_MODE_INVALID = -EINVAL,
+} IPoIBMode;
+
+typedef struct IPoIB {
+ NetDev meta;
+
+ uint16_t pkey;
+ IPoIBMode mode;
+ int umcast;
+} IPoIB;
+
+DEFINE_NETDEV_CAST(IPOIB, IPoIB);
+extern const NetDevVTable ipoib_vtable;
+
+int ipoib_set_netlink_message(Link *link, sd_netlink_message *m);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_ipoib_pkey);
+CONFIG_PARSER_PROTOTYPE(config_parse_ipoib_mode);
diff --git a/src/network/netdev/netdev-gperf.gperf b/src/network/netdev/netdev-gperf.gperf
index 37a0d9fa5d5..a948ec2c8a0 100644
--- a/src/network/netdev/netdev-gperf.gperf
+++ b/src/network/netdev/netdev-gperf.gperf
@@ -11,6 +11,7 @@ _Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
#include "conf-parser.h"
#include "fou-tunnel.h"
#include "geneve.h"
+#include "ipoib.h"
#include "ipvlan.h"
#include "l2tp-tunnel.h"
#include "macsec.h"
@@ -253,3 +254,6 @@ BatmanAdvanced.GatewayBandwidthUp, config_parse_badadv_bandwidth,
BatmanAdvanced.HopPenalty, config_parse_uint8, 0, offsetof(BatmanAdvanced, hop_penalty)
BatmanAdvanced.OriginatorIntervalSec, config_parse_sec, 0, offsetof(BatmanAdvanced, originator_interval)
BatmanAdvanced.RoutingAlgorithm, config_parse_batadv_routing_algorithm, 0, offsetof(BatmanAdvanced, routing_algorithm)
+IPoIB.PartitionKey, config_parse_ipoib_pkey, 0, offsetof(IPoIB, pkey)
+IPoIB.Mode, config_parse_ipoib_mode, 0, offsetof(IPoIB, mode)
+IPoIB.IgnoreUserspaceMulticastGroups, config_parse_tristate, 0, offsetof(IPoIB, umcast)
diff --git a/src/network/netdev/netdev.c b/src/network/netdev/netdev.c
index 5d88cbdf5f1..f6732925940 100644
--- a/src/network/netdev/netdev.c
+++ b/src/network/netdev/netdev.c
@@ -18,6 +18,7 @@
#include "fou-tunnel.h"
#include "geneve.h"
#include "ifb.h"
+#include "ipoib.h"
#include "ipvlan.h"
#include "l2tp-tunnel.h"
#include "list.h"
@@ -64,6 +65,7 @@ const NetDevVTable * const netdev_vtable[_NETDEV_KIND_MAX] = {
[NETDEV_KIND_IP6GRETAP] = &ip6gretap_vtable,
[NETDEV_KIND_IP6TNL] = &ip6tnl_vtable,
[NETDEV_KIND_IPIP] = &ipip_vtable,
+ [NETDEV_KIND_IPOIB] = &ipoib_vtable,
[NETDEV_KIND_IPVLAN] = &ipvlan_vtable,
[NETDEV_KIND_IPVTAP] = &ipvtap_vtable,
[NETDEV_KIND_L2TP] = &l2tptnl_vtable,
@@ -103,6 +105,7 @@ static const char* const netdev_kind_table[_NETDEV_KIND_MAX] = {
[NETDEV_KIND_IP6GRETAP] = "ip6gretap",
[NETDEV_KIND_IP6TNL] = "ip6tnl",
[NETDEV_KIND_IPIP] = "ipip",
+ [NETDEV_KIND_IPOIB] = "ipoib",
[NETDEV_KIND_IPVLAN] = "ipvlan",
[NETDEV_KIND_IPVTAP] = "ipvtap",
[NETDEV_KIND_L2TP] = "l2tp",
@@ -391,17 +394,26 @@ int netdev_set_ifindex(NetDev *netdev, sd_netlink_message *message) {
#define HASH_KEY SD_ID128_MAKE(52,e1,45,bd,00,6f,29,96,21,c6,30,6d,83,71,04,48)
-int netdev_generate_hw_addr(NetDev *netdev, const char *name, struct hw_addr_data *hw_addr) {
+int netdev_generate_hw_addr(
+ NetDev *netdev,
+ Link *parent,
+ const char *name,
+ const struct hw_addr_data *hw_addr,
+ struct hw_addr_data *ret) {
+
+ struct hw_addr_data a = HW_ADDR_NULL;
bool warn_invalid = false;
- struct hw_addr_data a;
int r;
assert(netdev);
assert(name);
assert(hw_addr);
+ assert(ret);
- if (hw_addr_equal(hw_addr, &HW_ADDR_NONE))
+ if (hw_addr_equal(hw_addr, &HW_ADDR_NONE)) {
+ *ret = HW_ADDR_NULL;
return 0;
+ }
if (hw_addr->length == 0) {
uint64_t result;
@@ -409,42 +421,66 @@ int netdev_generate_hw_addr(NetDev *netdev, const char *name, struct hw_addr_dat
/* HardwareAddress= is not specified. */
if (!NETDEV_VTABLE(netdev)->generate_mac)
- return 0;
+ goto finalize;
- if (NETDEV_VTABLE(netdev)->iftype != ARPHRD_ETHER)
- return 0;
+ if (!IN_SET(NETDEV_VTABLE(netdev)->iftype, ARPHRD_ETHER, ARPHRD_INFINIBAND))
+ goto finalize;
r = net_get_unique_predictable_data_from_name(name, &HASH_KEY, &result);
if (r < 0) {
log_netdev_warning_errno(netdev, r,
"Failed to generate persistent MAC address, ignoring: %m");
- return 0;
+ goto finalize;
}
a.length = arphrd_to_hw_addr_len(NETDEV_VTABLE(netdev)->iftype);
- assert(a.length <= sizeof(result));
- memcpy(a.bytes, &result, a.length);
- if (ether_addr_is_null(&a.ether) || ether_addr_is_broadcast(&a.ether)) {
- log_netdev_warning_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
- "Failed to generate persistent MAC address, ignoring: %m");
- return 0;
+ switch (NETDEV_VTABLE(netdev)->iftype) {
+ case ARPHRD_ETHER:
+ assert(a.length <= sizeof(result));
+ memcpy(a.bytes, &result, a.length);
+
+ if (ether_addr_is_null(&a.ether) || ether_addr_is_broadcast(&a.ether)) {
+ log_netdev_warning_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "Failed to generate persistent MAC address, ignoring: %m");
+ a = HW_ADDR_NULL;
+ goto finalize;
+ }
+
+ break;
+ case ARPHRD_INFINIBAND:
+ if (result == 0) {
+ log_netdev_warning_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "Failed to generate persistent MAC address: %m");
+ goto finalize;
+ }
+
+ assert(a.length >= sizeof(result));
+ memzero(a.bytes, a.length - sizeof(result));
+ memcpy(a.bytes + a.length - sizeof(result), &result, sizeof(result));
+ break;
+ default:
+ assert_not_reached();
}
+
} else {
a = *hw_addr;
warn_invalid = true;
}
- r = net_verify_hardware_address(name, warn_invalid, NETDEV_VTABLE(netdev)->iftype, NULL, &a);
+ r = net_verify_hardware_address(name, warn_invalid, NETDEV_VTABLE(netdev)->iftype,
+ parent ? &parent->hw_addr : NULL, &a);
if (r < 0)
return r;
- *hw_addr = a;
+finalize:
+ *ret = a;
return 0;
}
static int netdev_create(NetDev *netdev, Link *link, link_netlink_message_handler_t callback) {
_cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ struct hw_addr_data hw_addr;
int r;
assert(netdev);
@@ -470,8 +506,13 @@ static int netdev_create(NetDev *netdev, Link *link, link_netlink_message_handle
if (r < 0)
return log_netdev_error_errno(netdev, r, "Could not append IFLA_IFNAME, attribute: %m");
- if (netdev->hw_addr.length > 0 && !hw_addr_equal(&netdev->hw_addr, &HW_ADDR_NULL)) {
- r = netlink_message_append_hw_addr(m, IFLA_ADDRESS, &netdev->hw_addr);
+ r = netdev_generate_hw_addr(netdev, link, netdev->ifname, &netdev->hw_addr, &hw_addr);
+ if (r < 0)
+ return r;
+
+ if (hw_addr.length > 0) {
+ log_netdev_debug(netdev, "Using MAC address: %s", HW_ADDR_TO_STR(&hw_addr));
+ r = netlink_message_append_hw_addr(m, IFLA_ADDRESS, &hw_addr);
if (r < 0)
return log_netdev_error_errno(netdev, r, "Could not append IFLA_ADDRESS attribute: %m");
}
@@ -789,10 +830,6 @@ int netdev_load_one(Manager *manager, const char *filename) {
if (!netdev->filename)
return log_oom();
- r = netdev_generate_hw_addr(netdev, netdev->ifname, &netdev->hw_addr);
- if (r < 0)
- return r;
-
r = hashmap_ensure_put(&netdev->manager->netdevs, &string_hash_ops, netdev->ifname, netdev);
if (r == -ENOMEM)
return log_oom();
diff --git a/src/network/netdev/netdev.h b/src/network/netdev/netdev.h
index ed8a2e33c51..c7262f550a5 100644
--- a/src/network/netdev/netdev.h
+++ b/src/network/netdev/netdev.h
@@ -22,6 +22,7 @@
"-Bridge\0" \
"-FooOverUDP\0" \
"-GENEVE\0" \
+ "-IPoIB\0" \
"-IPVLAN\0" \
"-IPVTAP\0" \
"-L2TP\0" \
@@ -60,6 +61,7 @@ typedef enum NetDevKind {
NETDEV_KIND_IP6GRETAP,
NETDEV_KIND_IP6TNL,
NETDEV_KIND_IPIP,
+ NETDEV_KIND_IPOIB,
NETDEV_KIND_IPVLAN,
NETDEV_KIND_IPVTAP,
NETDEV_KIND_L2TP,
@@ -201,7 +203,8 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(NetDev*, netdev_unref);
bool netdev_is_managed(NetDev *netdev);
int netdev_get(Manager *manager, const char *name, NetDev **ret);
int netdev_set_ifindex(NetDev *netdev, sd_netlink_message *newlink);
-int netdev_generate_hw_addr(NetDev *netdev, const char *name, struct hw_addr_data *hw_addr);
+int netdev_generate_hw_addr(NetDev *netdev, Link *link, const char *name,
+ const struct hw_addr_data *hw_addr, struct hw_addr_data *ret);
int netdev_join(NetDev *netdev, Link *link, link_netlink_message_handler_t cb);
int request_process_stacked_netdev(Request *req);
diff --git a/src/network/netdev/veth.c b/src/network/netdev/veth.c
index d7870d7628a..c946e81fc0a 100644
--- a/src/network/netdev/veth.c
+++ b/src/network/netdev/veth.c
@@ -10,6 +10,7 @@
#include "veth.h"
static int netdev_veth_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ struct hw_addr_data hw_addr;
Veth *v;
int r;
@@ -31,8 +32,13 @@ static int netdev_veth_fill_message_create(NetDev *netdev, Link *link, sd_netlin
return log_netdev_error_errno(netdev, r, "Failed to add netlink interface name: %m");
}
- if (v->hw_addr_peer.length > 0 && !hw_addr_equal(&v->hw_addr_peer, &HW_ADDR_NULL)) {
- r = netlink_message_append_hw_addr(m, IFLA_ADDRESS, &v->hw_addr_peer);
+ r = netdev_generate_hw_addr(netdev, NULL, v->ifname_peer, &v->hw_addr_peer, &hw_addr);
+ if (r < 0)
+ return r;
+
+ if (hw_addr.length > 0) {
+ log_netdev_debug(netdev, "Using MAC address for peer: %s", HW_ADDR_TO_STR(&hw_addr));
+ r = netlink_message_append_hw_addr(m, IFLA_ADDRESS, &hw_addr);
if (r < 0)
return log_netdev_error_errno(netdev, r, "Could not append IFLA_ADDRESS attribute: %m");
}
@@ -52,7 +58,6 @@ static int netdev_veth_fill_message_create(NetDev *netdev, Link *link, sd_netlin
static int netdev_veth_verify(NetDev *netdev, const char *filename) {
Veth *v;
- int r;
assert(netdev);
assert(filename);
@@ -66,10 +71,6 @@ static int netdev_veth_verify(NetDev *netdev, const char *filename) {
"Veth NetDev without peer name configured in %s. Ignoring",
filename);
- r = netdev_generate_hw_addr(netdev, v->ifname_peer, &v->hw_addr_peer);
- if (r < 0)
- return r;
-
return 0;
}
diff --git a/src/network/networkd-link.c b/src/network/networkd-link.c
index 4df3e19220d..266334e78ce 100644
--- a/src/network/networkd-link.c
+++ b/src/network/networkd-link.c
@@ -1101,6 +1101,10 @@ static int link_configure(Link *link) {
if (r < 0)
return r;
+ r = link_request_to_set_ipoib(link);
+ if (r < 0)
+ return r;
+
r = link_request_to_set_flags(link);
if (r < 0)
return r;
diff --git a/src/network/networkd-network-gperf.gperf b/src/network/networkd-network-gperf.gperf
index df9721a9bcb..3b8d45c1bb5 100644
--- a/src/network/networkd-network-gperf.gperf
+++ b/src/network/networkd-network-gperf.gperf
@@ -87,6 +87,7 @@ Network.BatmanAdvanced, config_parse_ifname,
Network.Bond, config_parse_ifname, 0, offsetof(Network, bond_name)
Network.Bridge, config_parse_ifname, 0, offsetof(Network, bridge_name)
Network.VRF, config_parse_ifname, 0, offsetof(Network, vrf_name)
+Network.IPoIB, config_parse_stacked_netdev, NETDEV_KIND_IPOIB, offsetof(Network, stacked_netdev_names)
Network.IPVLAN, config_parse_stacked_netdev, NETDEV_KIND_IPVLAN, offsetof(Network, stacked_netdev_names)
Network.IPVTAP, config_parse_stacked_netdev, NETDEV_KIND_IPVTAP, offsetof(Network, stacked_netdev_names)
Network.L2TP, config_parse_stacked_netdev, NETDEV_KIND_L2TP, offsetof(Network, stacked_netdev_names)
@@ -381,6 +382,8 @@ CAN.PresumeACK, config_parse_can_control_mode,
CAN.FDNonISO, config_parse_can_control_mode, CAN_CTRLMODE_FD_NON_ISO, 0
CAN.ClassicDataLengthCode, config_parse_can_control_mode, CAN_CTRLMODE_CC_LEN8_DLC, 0
CAN.Termination, config_parse_can_termination, 0, 0
+IPoIB.Mode, config_parse_ipoib_mode, 0, offsetof(Network, ipoib_mode)
+IPoIB.IgnoreUserspaceMulticastGroups, config_parse_tristate, 0, offsetof(Network, ipoib_umcast)
QDisc.Parent, config_parse_qdisc_parent, _QDISC_KIND_INVALID, 0
QDisc.Handle, config_parse_qdisc_handle, _QDISC_KIND_INVALID, 0
BFIFO.Parent, config_parse_qdisc_parent, QDISC_KIND_BFIFO, 0
diff --git a/src/network/networkd-network.c b/src/network/networkd-network.c
index 443222f6106..6651c6c04cc 100644
--- a/src/network/networkd-network.c
+++ b/src/network/networkd-network.c
@@ -476,6 +476,9 @@ int network_load_one(Manager *manager, OrderedHashmap **networks, const char *fi
.ipv6_accept_ra_start_dhcp6_client = IPV6_ACCEPT_RA_START_DHCP6_CLIENT_YES,
.can_termination = -1,
+
+ .ipoib_mode = _IP_OVER_INFINIBAND_MODE_INVALID,
+ .ipoib_umcast = -1,
};
r = config_parse_many(
@@ -852,6 +855,7 @@ int config_parse_stacked_netdev(
assert(rvalue);
assert(data);
assert(IN_SET(kind,
+ NETDEV_KIND_IPOIB,
NETDEV_KIND_IPVLAN,
NETDEV_KIND_IPVTAP,
NETDEV_KIND_L2TP,
diff --git a/src/network/networkd-network.h b/src/network/networkd-network.h
index 2fdd4994c4c..9304ceaafbf 100644
--- a/src/network/networkd-network.h
+++ b/src/network/networkd-network.h
@@ -11,6 +11,7 @@
#include "condition.h"
#include "conf-parser.h"
#include "hashmap.h"
+#include "ipoib.h"
#include "net-condition.h"
#include "netdev.h"
#include "networkd-bridge-vlan.h"
@@ -286,6 +287,10 @@ struct Network {
uint16_t can_termination;
bool can_termination_set;
+ /* IPoIB support */
+ IPoIBMode ipoib_mode;
+ int ipoib_umcast;
+
/* sysctl settings */
AddressFamily ip_forward;
int ipv4_accept_local;
diff --git a/src/network/networkd-setlink.c b/src/network/networkd-setlink.c
index e7b39778da6..177c054f463 100644
--- a/src/network/networkd-setlink.c
+++ b/src/network/networkd-setlink.c
@@ -24,6 +24,7 @@ static const char *const set_link_operation_table[_SET_LINK_OPERATION_MAX] = {
[SET_LINK_CAN] = "CAN interface configurations",
[SET_LINK_FLAGS] = "link flags",
[SET_LINK_GROUP] = "interface group",
+ [SET_LINK_IPOIB] = "IPoIB configurations",
[SET_LINK_MAC] = "MAC address",
[SET_LINK_MASTER] = "master interface",
[SET_LINK_MTU] = "MTU",
@@ -153,6 +154,10 @@ static int link_set_group_handler(sd_netlink *rtnl, sd_netlink_message *m, Link
return set_link_handler_internal(rtnl, m, link, SET_LINK_GROUP, /* ignore = */ false, NULL);
}
+static int link_set_ipoib_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ return set_link_handler_internal(rtnl, m, link, SET_LINK_IPOIB, /* ignore = */ true, NULL);
+}
+
static int link_set_mac_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
return set_link_handler_internal(rtnl, m, link, SET_LINK_MAC, /* ignore = */ true, get_link_default_handler);
}
@@ -236,7 +241,7 @@ static int link_configure(
r = sd_rtnl_message_new_link(link->manager->rtnl, &req, RTM_NEWLINK, link->master_ifindex);
if (r < 0)
return log_link_debug_errno(link, r, "Could not allocate RTM_NEWLINK message: %m");
- } else if (op == SET_LINK_CAN) {
+ } else if (IN_SET(op, SET_LINK_CAN, SET_LINK_IPOIB)) {
r = sd_rtnl_message_new_link(link->manager->rtnl, &req, RTM_NEWLINK, link->ifindex);
if (r < 0)
return log_link_debug_errno(link, r, "Could not allocate RTM_NEWLINK message: %m");
@@ -468,6 +473,11 @@ static int link_configure(
if (r < 0)
return log_link_debug_errno(link, r, "Could not append IFLA_ADDRESS attribute: %m");
break;
+ case SET_LINK_IPOIB:
+ r = ipoib_set_netlink_message(link, req);
+ if (r < 0)
+ return r;
+ break;
case SET_LINK_MASTER:
r = sd_netlink_message_append_u32(req, IFLA_MASTER, PTR_TO_UINT32(userdata));
if (r < 0)
@@ -508,6 +518,9 @@ static bool link_is_ready_to_call_set_link(Request *req) {
int r;
assert(req);
+ assert(req->link);
+ assert(req->link->manager);
+ assert(req->link->network);
link = req->link;
op = PTR_TO_INT(req->set_link_operation_ptr);
@@ -556,8 +569,6 @@ static bool link_is_ready_to_call_set_link(Request *req) {
case SET_LINK_MASTER: {
uint32_t m = 0;
- assert(link->network);
-
if (link->network->batadv) {
if (!netdev_is_ready(link->network->batadv))
return false;
@@ -590,6 +601,15 @@ static bool link_is_ready_to_call_set_link(Request *req) {
req->userdata = UINT32_TO_PTR(m);
break;
}
+ case SET_LINK_MTU: {
+ Request req_ipoib = {
+ .link = link,
+ .type = REQUEST_TYPE_SET_LINK,
+ .set_link_operation_ptr = INT_TO_PTR(SET_LINK_IPOIB),
+ };
+
+ return !ordered_set_contains(link->manager->request_queue, &req_ipoib);
+ }
default:
break;
}
@@ -800,6 +820,20 @@ int link_request_to_set_mac(Link *link, bool allow_retry) {
NULL);
}
+int link_request_to_set_ipoib(Link *link) {
+ assert(link);
+ assert(link->network);
+
+ if (link->iftype != ARPHRD_INFINIBAND)
+ return 0;
+
+ if (link->network->ipoib_mode < 0 &&
+ link->network->ipoib_umcast < 0)
+ return 0;
+
+ return link_request_set_link(link, SET_LINK_IPOIB, link_set_ipoib_handler, NULL);
+}
+
int link_request_to_set_master(Link *link) {
assert(link);
assert(link->network);
diff --git a/src/network/networkd-setlink.h b/src/network/networkd-setlink.h
index d3e9f2b9d7b..39a85a6871d 100644
--- a/src/network/networkd-setlink.h
+++ b/src/network/networkd-setlink.h
@@ -14,6 +14,7 @@ typedef enum SetLinkOperation {
SET_LINK_CAN, /* Setting CAN interface configs. */
SET_LINK_FLAGS, /* Setting IFF_NOARP or friends. */
SET_LINK_GROUP, /* Setting interface group. */
+ SET_LINK_IPOIB, /* Setting IPoIB configs. */
SET_LINK_MAC, /* Setting MAC address. */
SET_LINK_MASTER, /* Setting IFLA_MASTER. */
SET_LINK_MTU, /* Setting MTU. */
@@ -33,6 +34,7 @@ int link_request_to_set_can(Link *link);
int link_request_to_set_flags(Link *link);
int link_request_to_set_group(Link *link);
int link_request_to_set_mac(Link *link, bool allow_retry);
+int link_request_to_set_ipoib(Link *link);
int link_request_to_set_master(Link *link);
int link_request_to_set_mtu(Link *link, uint32_t mtu);
diff --git a/test/fuzz/fuzz-netdev-parser/directives.netdev b/test/fuzz/fuzz-netdev-parser/directives.netdev
index e34d16af117..f5fa2418fee 100644
--- a/test/fuzz/fuzz-netdev-parser/directives.netdev
+++ b/test/fuzz/fuzz-netdev-parser/directives.netdev
@@ -241,3 +241,7 @@ GatewayBandwithUp=
GatewayBandwidthDown=
GatewayBandwidthUp=
RoutingAlgorithm=
+[IPoIB]
+PartitionKey=
+Mode=
+IgnoreUserspaceMulticastGroups=
diff --git a/test/fuzz/fuzz-network-parser/directives.network b/test/fuzz/fuzz-network-parser/directives.network
index 5b5a4f8c60e..c1c00262341 100644
--- a/test/fuzz/fuzz-network-parser/directives.network
+++ b/test/fuzz/fuzz-network-parser/directives.network
@@ -242,6 +242,7 @@ IgnoreCarrierLoss=
KeepConfiguration=
DHCPv6PrefixDelegation=
BatmanAdvanced=
+IPoIB=
[IPv6Prefix]
Prefix=
OnLink=
@@ -286,6 +287,9 @@ Loopback=
OneShot=
PresumeACK=
ClassicDataLengthCode=
+[IPoIB]
+Mode=
+IgnoreUserspaceMulticastGroups=
[Address]
DuplicateAddressDetection=
AutoJoin=