diff --git a/man/systemd.netdev.xml b/man/systemd.netdev.xml index e4e7e611e77..255b85f4049 100644 --- a/man/systemd.netdev.xml +++ b/man/systemd.netdev.xml @@ -189,6 +189,9 @@ batadv B.A.T.M.A.N. Advanced is a routing protocol for multi-hop mobile ad-hoc networks which operates on layer 2. + + ipoib + An IP over Infiniband subinterface. @@ -2125,6 +2128,49 @@ + + [IPoIB] Section Options + The [IPoIB] section only applies for netdevs of kind ipoib and accepts the + following keys: + + + + PartitionKey= + + Takes an integer in the range 1…0xffff, except for 0x8000. Defaults to unset, and the + kernel's default is used. + + + + + Mode= + + Takes one of the special values datagram or + connected. Defaults to unset, and the kernel's default is used. + + When datagram, the Infiniband unreliable datagram (UD) transport is + used, and so the interface MTU is equal to the IB L2 MTU minus the IPoIB encapsulation + header (4 bytes). For example, in a typical IB fabric with a 2K MTU, the IPoIB MTU will be + 2048 - 4 = 2044 bytes. + + When connected, the Infiniband reliable connected (RC) transport is + used. Connected mode takes advantage of the connected nature of the IB transport and allows + an MTU up to the maximal IP packet size of 64K, which reduces the number of IP packets needed + for handling large UDP datagrams, TCP segments, etc and increases the performance for large + messages. + + + + + IgnoreUserspaceMulticastGroup= + + Takes an boolean value. When true, the kernel ignores multicast groups handled by + userspace. Defaults to unset, and the kernel's default is used. + + + + + Examples diff --git a/man/systemd.network.xml b/man/systemd.network.xml index 50367ecdcd7..e8e01f9094c 100644 --- a/man/systemd.network.xml +++ b/man/systemd.network.xml @@ -902,6 +902,7 @@ Table=1234 + IPoIB= IPVLAN= IPVTAP= L2TP= @@ -913,8 +914,8 @@ Table=1234 VXLAN= Xfrm= - The name of an IPVLAN, IPVTAP, L2TP, MACsec, MACVLAN, MACVTAP, tunnel, VLAN, VXLAN, or - Xfrm to be created on the link. See + The name of an IPoIB, IPVLAN, IPVTAP, L2TP, MACsec, MACVLAN, MACVTAP, tunnel, VLAN, + VXLAN, or Xfrm to be created on the link. See systemd.netdev5. This option may be specified more than once. @@ -3226,6 +3227,15 @@ Token=prefixstable:2002:da8:1:: + + [IPoIB] Section Options + The [IPoIB] section manages the IP over Infiniband and accepts the following keys: + + + + + + [QDisc] Section Options The [QDisc] section manages the traffic control queueing discipline (qdisc). diff --git a/src/libsystemd/sd-netlink/netlink-message.c b/src/libsystemd/sd-netlink/netlink-message.c index 874fffe9d93..182c74ed3d8 100644 --- a/src/libsystemd/sd-netlink/netlink-message.c +++ b/src/libsystemd/sd-netlink/netlink-message.c @@ -147,7 +147,7 @@ sd_netlink_message *sd_netlink_message_unref(sd_netlink_message *m) { int sd_netlink_message_get_type(sd_netlink_message *m, uint16_t *type) { assert_return(m, -EINVAL); - assert_return(type, -EINVAL); + assert_return(type != 0, -EINVAL); *type = m->hdr->nlmsg_type; @@ -156,7 +156,7 @@ int sd_netlink_message_get_type(sd_netlink_message *m, uint16_t *type) { int sd_netlink_message_set_flags(sd_netlink_message *m, uint16_t flags) { assert_return(m, -EINVAL); - assert_return(flags, -EINVAL); + assert_return(flags != 0, -EINVAL); m->hdr->nlmsg_flags = flags; diff --git a/src/libsystemd/sd-netlink/netlink-types-rtnl.c b/src/libsystemd/sd-netlink/netlink-types-rtnl.c index 624422eea27..167bbc5ccfc 100644 --- a/src/libsystemd/sd-netlink/netlink-types-rtnl.c +++ b/src/libsystemd/sd-netlink/netlink-types-rtnl.c @@ -210,7 +210,7 @@ static const NLType rtnl_link_info_data_geneve_types[] = { [IFLA_GENEVE_DF] = { .type = NETLINK_TYPE_U8 }, }; -static const NLType rtnl_link_info_data_gre_types[] = { +static const NLType rtnl_link_info_data_gre_types[] = { [IFLA_GRE_LINK] = { .type = NETLINK_TYPE_U32 }, [IFLA_GRE_IFLAGS] = { .type = NETLINK_TYPE_U16 }, [IFLA_GRE_OFLAGS] = { .type = NETLINK_TYPE_U16 }, @@ -237,6 +237,12 @@ static const NLType rtnl_link_info_data_gre_types[] = { [IFLA_GRE_ERSPAN_HWID] = { .type = NETLINK_TYPE_U16 }, }; +static const NLType rtnl_link_info_data_ipoib_types[] = { + [IFLA_IPOIB_PKEY] = { .type = NETLINK_TYPE_U16 }, + [IFLA_IPOIB_MODE] = { .type = NETLINK_TYPE_U16 }, + [IFLA_IPOIB_UMCAST] = { .type = NETLINK_TYPE_U16 }, +}; + /* IFLA_IPTUN_ attributes are used in ipv4/ipip.c, ipv6/ip6_tunnel.c, and ipv6/sit.c. And unfortunately, * IFLA_IPTUN_FLAGS is used with different types, ugh... */ #define DEFINE_IPTUN_TYPES(name, flags_type) \ @@ -410,9 +416,7 @@ static const NLTypeSystemUnionElement rtnl_link_info_data_type_systems[] = { { .name = "ip6gre", .type_system = TYPE_SYSTEM_FROM_TYPE(rtnl_link_info_data_gre), }, { .name = "ip6gretap", .type_system = TYPE_SYSTEM_FROM_TYPE(rtnl_link_info_data_gre), }, { .name = "ip6tnl", .type_system = TYPE_SYSTEM_FROM_TYPE(rtnl_link_info_data_iptun), }, -/* { .name = "ipoib", .type_system = TYPE_SYSTEM_FROM_TYPE(rtnl_link_info_data_ipoib), }, -*/ { .name = "ipip", .type_system = TYPE_SYSTEM_FROM_TYPE(rtnl_link_info_data_iptun), }, { .name = "ipvlan", .type_system = TYPE_SYSTEM_FROM_TYPE(rtnl_link_info_data_ipvlan), }, { .name = "ipvtap", .type_system = TYPE_SYSTEM_FROM_TYPE(rtnl_link_info_data_ipvlan), }, diff --git a/src/network/meson.build b/src/network/meson.build index cfa16a8ecf0..c1cf227ffcb 100644 --- a/src/network/meson.build +++ b/src/network/meson.build @@ -13,6 +13,8 @@ sources = files(''' netdev/dummy.h netdev/ifb.c netdev/ifb.h + netdev/ipoib.c + netdev/ipoib.h netdev/ipvlan.c netdev/ipvlan.h netdev/macvlan.c diff --git a/src/network/netdev/ipoib.c b/src/network/netdev/ipoib.c new file mode 100644 index 00000000000..c3f583fc467 --- /dev/null +++ b/src/network/netdev/ipoib.c @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include + +#include "ipoib.h" +#include "networkd-network.h" +#include "parse-util.h" +#include "string-table.h" + +assert_cc((int) IP_OVER_INFINIBAND_MODE_DATAGRAM == (int) IPOIB_MODE_DATAGRAM); +assert_cc((int) IP_OVER_INFINIBAND_MODE_CONNECTED == (int) IPOIB_MODE_CONNECTED); + +static void netdev_ipoib_init(NetDev *netdev) { + IPoIB *ipoib; + + assert(netdev); + + ipoib = IPOIB(netdev); + + assert(ipoib); + + ipoib->mode = _IP_OVER_INFINIBAND_MODE_INVALID; + ipoib->umcast = -1; +} + +static int netdev_ipoib_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) { + IPoIB *ipoib; + int r; + + assert(netdev); + assert(link); + assert(m); + + ipoib = IPOIB(netdev); + + assert(ipoib); + + if (ipoib->pkey > 0) { + r = sd_netlink_message_append_u16(m, IFLA_IPOIB_PKEY, ipoib->pkey); + if (r < 0) + return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPOIB_PKEY attribute: %m"); + } + + if (ipoib->mode >= 0) { + r = sd_netlink_message_append_u16(m, IFLA_IPOIB_MODE, ipoib->mode); + if (r < 0) + return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPOIB_MODE attribute: %m"); + } + + if (ipoib->umcast >= 0) { + r = sd_netlink_message_append_u16(m, IFLA_IPOIB_UMCAST, ipoib->umcast); + if (r < 0) + return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPOIB_UMCAST attribute: %m"); + } + + return 0; +} + +int ipoib_set_netlink_message(Link *link, sd_netlink_message *m) { + int r; + + assert(link); + assert(link->network); + assert(m); + + r = sd_netlink_message_set_flags(m, NLM_F_REQUEST | NLM_F_ACK); + if (r < 0) + return log_link_debug_errno(link, r, "Could not set netlink flags: %m"); + + r = sd_netlink_message_open_container(m, IFLA_LINKINFO); + if (r < 0) + return log_link_debug_errno(link, r, "Failed to open IFLA_LINKINFO container: %m"); + + r = sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, link->kind); + if (r < 0) + return log_link_debug_errno(link, r, "Could not open IFLA_INFO_DATA container: %m"); + + if (link->network->ipoib_mode >= 0) { + r = sd_netlink_message_append_u16(m, IFLA_IPOIB_MODE, link->network->ipoib_mode); + if (r < 0) + return log_link_debug_errno(link, r, "Could not append IFLA_IPOIB_MODE attribute: %m"); + } + + if (link->network->ipoib_umcast >= 0) { + r = sd_netlink_message_append_u16(m, IFLA_IPOIB_UMCAST, link->network->ipoib_umcast); + if (r < 0) + return log_link_debug_errno(link, r, "Could not append IFLA_IPOIB_UMCAST attribute: %m"); + } + + r = sd_netlink_message_close_container(m); + if (r < 0) + return log_link_debug_errno(link, r, "Failed to close IFLA_INFO_DATA container: %m"); + + r = sd_netlink_message_close_container(m); + if (r < 0) + return log_link_debug_errno(link, r, "Failed to close IFLA_LINKINFO container: %m"); + + return 0; +} + +static const char * const ipoib_mode_table[_IP_OVER_INFINIBAND_MODE_MAX] = { + [IP_OVER_INFINIBAND_MODE_DATAGRAM] = "datagram", + [IP_OVER_INFINIBAND_MODE_CONNECTED] = "connected", +}; + +DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(ipoib_mode, IPoIBMode); +DEFINE_CONFIG_PARSE_ENUM(config_parse_ipoib_mode, ipoib_mode, IPoIBMode, "Failed to parse IPoIB mode"); + +int config_parse_ipoib_pkey( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + uint16_t u, *pkey = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + if (isempty(rvalue)) { + *pkey = 0; /* 0 means unset. */ + return 0; + } + + r = safe_atou16(rvalue, &u); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse IPoIB pkey '%s', ignoring assignment: %m", + rvalue); + return 0; + } + if (u == 0 || u == 0x8000) { + log_syntax(unit, LOG_WARNING, filename, line, 0, + "IPoIB pkey cannot be 0 nor 0x8000, ignoring assignment: %s", + rvalue); + return 0; + } + + *pkey = u; + return 0; +} + + +const NetDevVTable ipoib_vtable = { + .object_size = sizeof(IPoIB), + .sections = NETDEV_COMMON_SECTIONS "IPoIB\0", + .init = netdev_ipoib_init, + .fill_message_create = netdev_ipoib_fill_message_create, + .create_type = NETDEV_CREATE_STACKED, + .iftype = ARPHRD_INFINIBAND, + .generate_mac = true, +}; diff --git a/src/network/netdev/ipoib.h b/src/network/netdev/ipoib.h new file mode 100644 index 00000000000..415d3b107c8 --- /dev/null +++ b/src/network/netdev/ipoib.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +#include "conf-parser.h" +#include "netdev.h" + +typedef enum IPoIBMode { + IP_OVER_INFINIBAND_MODE_DATAGRAM, + IP_OVER_INFINIBAND_MODE_CONNECTED, + _IP_OVER_INFINIBAND_MODE_MAX, + _IP_OVER_INFINIBAND_MODE_INVALID = -EINVAL, +} IPoIBMode; + +typedef struct IPoIB { + NetDev meta; + + uint16_t pkey; + IPoIBMode mode; + int umcast; +} IPoIB; + +DEFINE_NETDEV_CAST(IPOIB, IPoIB); +extern const NetDevVTable ipoib_vtable; + +int ipoib_set_netlink_message(Link *link, sd_netlink_message *m); + +CONFIG_PARSER_PROTOTYPE(config_parse_ipoib_pkey); +CONFIG_PARSER_PROTOTYPE(config_parse_ipoib_mode); diff --git a/src/network/netdev/netdev-gperf.gperf b/src/network/netdev/netdev-gperf.gperf index 37a0d9fa5d5..a948ec2c8a0 100644 --- a/src/network/netdev/netdev-gperf.gperf +++ b/src/network/netdev/netdev-gperf.gperf @@ -11,6 +11,7 @@ _Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"") #include "conf-parser.h" #include "fou-tunnel.h" #include "geneve.h" +#include "ipoib.h" #include "ipvlan.h" #include "l2tp-tunnel.h" #include "macsec.h" @@ -253,3 +254,6 @@ BatmanAdvanced.GatewayBandwidthUp, config_parse_badadv_bandwidth, BatmanAdvanced.HopPenalty, config_parse_uint8, 0, offsetof(BatmanAdvanced, hop_penalty) BatmanAdvanced.OriginatorIntervalSec, config_parse_sec, 0, offsetof(BatmanAdvanced, originator_interval) BatmanAdvanced.RoutingAlgorithm, config_parse_batadv_routing_algorithm, 0, offsetof(BatmanAdvanced, routing_algorithm) +IPoIB.PartitionKey, config_parse_ipoib_pkey, 0, offsetof(IPoIB, pkey) +IPoIB.Mode, config_parse_ipoib_mode, 0, offsetof(IPoIB, mode) +IPoIB.IgnoreUserspaceMulticastGroups, config_parse_tristate, 0, offsetof(IPoIB, umcast) diff --git a/src/network/netdev/netdev.c b/src/network/netdev/netdev.c index 5d88cbdf5f1..f6732925940 100644 --- a/src/network/netdev/netdev.c +++ b/src/network/netdev/netdev.c @@ -18,6 +18,7 @@ #include "fou-tunnel.h" #include "geneve.h" #include "ifb.h" +#include "ipoib.h" #include "ipvlan.h" #include "l2tp-tunnel.h" #include "list.h" @@ -64,6 +65,7 @@ const NetDevVTable * const netdev_vtable[_NETDEV_KIND_MAX] = { [NETDEV_KIND_IP6GRETAP] = &ip6gretap_vtable, [NETDEV_KIND_IP6TNL] = &ip6tnl_vtable, [NETDEV_KIND_IPIP] = &ipip_vtable, + [NETDEV_KIND_IPOIB] = &ipoib_vtable, [NETDEV_KIND_IPVLAN] = &ipvlan_vtable, [NETDEV_KIND_IPVTAP] = &ipvtap_vtable, [NETDEV_KIND_L2TP] = &l2tptnl_vtable, @@ -103,6 +105,7 @@ static const char* const netdev_kind_table[_NETDEV_KIND_MAX] = { [NETDEV_KIND_IP6GRETAP] = "ip6gretap", [NETDEV_KIND_IP6TNL] = "ip6tnl", [NETDEV_KIND_IPIP] = "ipip", + [NETDEV_KIND_IPOIB] = "ipoib", [NETDEV_KIND_IPVLAN] = "ipvlan", [NETDEV_KIND_IPVTAP] = "ipvtap", [NETDEV_KIND_L2TP] = "l2tp", @@ -391,17 +394,26 @@ int netdev_set_ifindex(NetDev *netdev, sd_netlink_message *message) { #define HASH_KEY SD_ID128_MAKE(52,e1,45,bd,00,6f,29,96,21,c6,30,6d,83,71,04,48) -int netdev_generate_hw_addr(NetDev *netdev, const char *name, struct hw_addr_data *hw_addr) { +int netdev_generate_hw_addr( + NetDev *netdev, + Link *parent, + const char *name, + const struct hw_addr_data *hw_addr, + struct hw_addr_data *ret) { + + struct hw_addr_data a = HW_ADDR_NULL; bool warn_invalid = false; - struct hw_addr_data a; int r; assert(netdev); assert(name); assert(hw_addr); + assert(ret); - if (hw_addr_equal(hw_addr, &HW_ADDR_NONE)) + if (hw_addr_equal(hw_addr, &HW_ADDR_NONE)) { + *ret = HW_ADDR_NULL; return 0; + } if (hw_addr->length == 0) { uint64_t result; @@ -409,42 +421,66 @@ int netdev_generate_hw_addr(NetDev *netdev, const char *name, struct hw_addr_dat /* HardwareAddress= is not specified. */ if (!NETDEV_VTABLE(netdev)->generate_mac) - return 0; + goto finalize; - if (NETDEV_VTABLE(netdev)->iftype != ARPHRD_ETHER) - return 0; + if (!IN_SET(NETDEV_VTABLE(netdev)->iftype, ARPHRD_ETHER, ARPHRD_INFINIBAND)) + goto finalize; r = net_get_unique_predictable_data_from_name(name, &HASH_KEY, &result); if (r < 0) { log_netdev_warning_errno(netdev, r, "Failed to generate persistent MAC address, ignoring: %m"); - return 0; + goto finalize; } a.length = arphrd_to_hw_addr_len(NETDEV_VTABLE(netdev)->iftype); - assert(a.length <= sizeof(result)); - memcpy(a.bytes, &result, a.length); - if (ether_addr_is_null(&a.ether) || ether_addr_is_broadcast(&a.ether)) { - log_netdev_warning_errno(netdev, SYNTHETIC_ERRNO(EINVAL), - "Failed to generate persistent MAC address, ignoring: %m"); - return 0; + switch (NETDEV_VTABLE(netdev)->iftype) { + case ARPHRD_ETHER: + assert(a.length <= sizeof(result)); + memcpy(a.bytes, &result, a.length); + + if (ether_addr_is_null(&a.ether) || ether_addr_is_broadcast(&a.ether)) { + log_netdev_warning_errno(netdev, SYNTHETIC_ERRNO(EINVAL), + "Failed to generate persistent MAC address, ignoring: %m"); + a = HW_ADDR_NULL; + goto finalize; + } + + break; + case ARPHRD_INFINIBAND: + if (result == 0) { + log_netdev_warning_errno(netdev, SYNTHETIC_ERRNO(EINVAL), + "Failed to generate persistent MAC address: %m"); + goto finalize; + } + + assert(a.length >= sizeof(result)); + memzero(a.bytes, a.length - sizeof(result)); + memcpy(a.bytes + a.length - sizeof(result), &result, sizeof(result)); + break; + default: + assert_not_reached(); } + } else { a = *hw_addr; warn_invalid = true; } - r = net_verify_hardware_address(name, warn_invalid, NETDEV_VTABLE(netdev)->iftype, NULL, &a); + r = net_verify_hardware_address(name, warn_invalid, NETDEV_VTABLE(netdev)->iftype, + parent ? &parent->hw_addr : NULL, &a); if (r < 0) return r; - *hw_addr = a; +finalize: + *ret = a; return 0; } static int netdev_create(NetDev *netdev, Link *link, link_netlink_message_handler_t callback) { _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + struct hw_addr_data hw_addr; int r; assert(netdev); @@ -470,8 +506,13 @@ static int netdev_create(NetDev *netdev, Link *link, link_netlink_message_handle if (r < 0) return log_netdev_error_errno(netdev, r, "Could not append IFLA_IFNAME, attribute: %m"); - if (netdev->hw_addr.length > 0 && !hw_addr_equal(&netdev->hw_addr, &HW_ADDR_NULL)) { - r = netlink_message_append_hw_addr(m, IFLA_ADDRESS, &netdev->hw_addr); + r = netdev_generate_hw_addr(netdev, link, netdev->ifname, &netdev->hw_addr, &hw_addr); + if (r < 0) + return r; + + if (hw_addr.length > 0) { + log_netdev_debug(netdev, "Using MAC address: %s", HW_ADDR_TO_STR(&hw_addr)); + r = netlink_message_append_hw_addr(m, IFLA_ADDRESS, &hw_addr); if (r < 0) return log_netdev_error_errno(netdev, r, "Could not append IFLA_ADDRESS attribute: %m"); } @@ -789,10 +830,6 @@ int netdev_load_one(Manager *manager, const char *filename) { if (!netdev->filename) return log_oom(); - r = netdev_generate_hw_addr(netdev, netdev->ifname, &netdev->hw_addr); - if (r < 0) - return r; - r = hashmap_ensure_put(&netdev->manager->netdevs, &string_hash_ops, netdev->ifname, netdev); if (r == -ENOMEM) return log_oom(); diff --git a/src/network/netdev/netdev.h b/src/network/netdev/netdev.h index ed8a2e33c51..c7262f550a5 100644 --- a/src/network/netdev/netdev.h +++ b/src/network/netdev/netdev.h @@ -22,6 +22,7 @@ "-Bridge\0" \ "-FooOverUDP\0" \ "-GENEVE\0" \ + "-IPoIB\0" \ "-IPVLAN\0" \ "-IPVTAP\0" \ "-L2TP\0" \ @@ -60,6 +61,7 @@ typedef enum NetDevKind { NETDEV_KIND_IP6GRETAP, NETDEV_KIND_IP6TNL, NETDEV_KIND_IPIP, + NETDEV_KIND_IPOIB, NETDEV_KIND_IPVLAN, NETDEV_KIND_IPVTAP, NETDEV_KIND_L2TP, @@ -201,7 +203,8 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(NetDev*, netdev_unref); bool netdev_is_managed(NetDev *netdev); int netdev_get(Manager *manager, const char *name, NetDev **ret); int netdev_set_ifindex(NetDev *netdev, sd_netlink_message *newlink); -int netdev_generate_hw_addr(NetDev *netdev, const char *name, struct hw_addr_data *hw_addr); +int netdev_generate_hw_addr(NetDev *netdev, Link *link, const char *name, + const struct hw_addr_data *hw_addr, struct hw_addr_data *ret); int netdev_join(NetDev *netdev, Link *link, link_netlink_message_handler_t cb); int request_process_stacked_netdev(Request *req); diff --git a/src/network/netdev/veth.c b/src/network/netdev/veth.c index d7870d7628a..c946e81fc0a 100644 --- a/src/network/netdev/veth.c +++ b/src/network/netdev/veth.c @@ -10,6 +10,7 @@ #include "veth.h" static int netdev_veth_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) { + struct hw_addr_data hw_addr; Veth *v; int r; @@ -31,8 +32,13 @@ static int netdev_veth_fill_message_create(NetDev *netdev, Link *link, sd_netlin return log_netdev_error_errno(netdev, r, "Failed to add netlink interface name: %m"); } - if (v->hw_addr_peer.length > 0 && !hw_addr_equal(&v->hw_addr_peer, &HW_ADDR_NULL)) { - r = netlink_message_append_hw_addr(m, IFLA_ADDRESS, &v->hw_addr_peer); + r = netdev_generate_hw_addr(netdev, NULL, v->ifname_peer, &v->hw_addr_peer, &hw_addr); + if (r < 0) + return r; + + if (hw_addr.length > 0) { + log_netdev_debug(netdev, "Using MAC address for peer: %s", HW_ADDR_TO_STR(&hw_addr)); + r = netlink_message_append_hw_addr(m, IFLA_ADDRESS, &hw_addr); if (r < 0) return log_netdev_error_errno(netdev, r, "Could not append IFLA_ADDRESS attribute: %m"); } @@ -52,7 +58,6 @@ static int netdev_veth_fill_message_create(NetDev *netdev, Link *link, sd_netlin static int netdev_veth_verify(NetDev *netdev, const char *filename) { Veth *v; - int r; assert(netdev); assert(filename); @@ -66,10 +71,6 @@ static int netdev_veth_verify(NetDev *netdev, const char *filename) { "Veth NetDev without peer name configured in %s. Ignoring", filename); - r = netdev_generate_hw_addr(netdev, v->ifname_peer, &v->hw_addr_peer); - if (r < 0) - return r; - return 0; } diff --git a/src/network/networkd-link.c b/src/network/networkd-link.c index 4df3e19220d..266334e78ce 100644 --- a/src/network/networkd-link.c +++ b/src/network/networkd-link.c @@ -1101,6 +1101,10 @@ static int link_configure(Link *link) { if (r < 0) return r; + r = link_request_to_set_ipoib(link); + if (r < 0) + return r; + r = link_request_to_set_flags(link); if (r < 0) return r; diff --git a/src/network/networkd-network-gperf.gperf b/src/network/networkd-network-gperf.gperf index df9721a9bcb..3b8d45c1bb5 100644 --- a/src/network/networkd-network-gperf.gperf +++ b/src/network/networkd-network-gperf.gperf @@ -87,6 +87,7 @@ Network.BatmanAdvanced, config_parse_ifname, Network.Bond, config_parse_ifname, 0, offsetof(Network, bond_name) Network.Bridge, config_parse_ifname, 0, offsetof(Network, bridge_name) Network.VRF, config_parse_ifname, 0, offsetof(Network, vrf_name) +Network.IPoIB, config_parse_stacked_netdev, NETDEV_KIND_IPOIB, offsetof(Network, stacked_netdev_names) Network.IPVLAN, config_parse_stacked_netdev, NETDEV_KIND_IPVLAN, offsetof(Network, stacked_netdev_names) Network.IPVTAP, config_parse_stacked_netdev, NETDEV_KIND_IPVTAP, offsetof(Network, stacked_netdev_names) Network.L2TP, config_parse_stacked_netdev, NETDEV_KIND_L2TP, offsetof(Network, stacked_netdev_names) @@ -381,6 +382,8 @@ CAN.PresumeACK, config_parse_can_control_mode, CAN.FDNonISO, config_parse_can_control_mode, CAN_CTRLMODE_FD_NON_ISO, 0 CAN.ClassicDataLengthCode, config_parse_can_control_mode, CAN_CTRLMODE_CC_LEN8_DLC, 0 CAN.Termination, config_parse_can_termination, 0, 0 +IPoIB.Mode, config_parse_ipoib_mode, 0, offsetof(Network, ipoib_mode) +IPoIB.IgnoreUserspaceMulticastGroups, config_parse_tristate, 0, offsetof(Network, ipoib_umcast) QDisc.Parent, config_parse_qdisc_parent, _QDISC_KIND_INVALID, 0 QDisc.Handle, config_parse_qdisc_handle, _QDISC_KIND_INVALID, 0 BFIFO.Parent, config_parse_qdisc_parent, QDISC_KIND_BFIFO, 0 diff --git a/src/network/networkd-network.c b/src/network/networkd-network.c index 443222f6106..6651c6c04cc 100644 --- a/src/network/networkd-network.c +++ b/src/network/networkd-network.c @@ -476,6 +476,9 @@ int network_load_one(Manager *manager, OrderedHashmap **networks, const char *fi .ipv6_accept_ra_start_dhcp6_client = IPV6_ACCEPT_RA_START_DHCP6_CLIENT_YES, .can_termination = -1, + + .ipoib_mode = _IP_OVER_INFINIBAND_MODE_INVALID, + .ipoib_umcast = -1, }; r = config_parse_many( @@ -852,6 +855,7 @@ int config_parse_stacked_netdev( assert(rvalue); assert(data); assert(IN_SET(kind, + NETDEV_KIND_IPOIB, NETDEV_KIND_IPVLAN, NETDEV_KIND_IPVTAP, NETDEV_KIND_L2TP, diff --git a/src/network/networkd-network.h b/src/network/networkd-network.h index 2fdd4994c4c..9304ceaafbf 100644 --- a/src/network/networkd-network.h +++ b/src/network/networkd-network.h @@ -11,6 +11,7 @@ #include "condition.h" #include "conf-parser.h" #include "hashmap.h" +#include "ipoib.h" #include "net-condition.h" #include "netdev.h" #include "networkd-bridge-vlan.h" @@ -286,6 +287,10 @@ struct Network { uint16_t can_termination; bool can_termination_set; + /* IPoIB support */ + IPoIBMode ipoib_mode; + int ipoib_umcast; + /* sysctl settings */ AddressFamily ip_forward; int ipv4_accept_local; diff --git a/src/network/networkd-setlink.c b/src/network/networkd-setlink.c index e7b39778da6..177c054f463 100644 --- a/src/network/networkd-setlink.c +++ b/src/network/networkd-setlink.c @@ -24,6 +24,7 @@ static const char *const set_link_operation_table[_SET_LINK_OPERATION_MAX] = { [SET_LINK_CAN] = "CAN interface configurations", [SET_LINK_FLAGS] = "link flags", [SET_LINK_GROUP] = "interface group", + [SET_LINK_IPOIB] = "IPoIB configurations", [SET_LINK_MAC] = "MAC address", [SET_LINK_MASTER] = "master interface", [SET_LINK_MTU] = "MTU", @@ -153,6 +154,10 @@ static int link_set_group_handler(sd_netlink *rtnl, sd_netlink_message *m, Link return set_link_handler_internal(rtnl, m, link, SET_LINK_GROUP, /* ignore = */ false, NULL); } +static int link_set_ipoib_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) { + return set_link_handler_internal(rtnl, m, link, SET_LINK_IPOIB, /* ignore = */ true, NULL); +} + static int link_set_mac_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) { return set_link_handler_internal(rtnl, m, link, SET_LINK_MAC, /* ignore = */ true, get_link_default_handler); } @@ -236,7 +241,7 @@ static int link_configure( r = sd_rtnl_message_new_link(link->manager->rtnl, &req, RTM_NEWLINK, link->master_ifindex); if (r < 0) return log_link_debug_errno(link, r, "Could not allocate RTM_NEWLINK message: %m"); - } else if (op == SET_LINK_CAN) { + } else if (IN_SET(op, SET_LINK_CAN, SET_LINK_IPOIB)) { r = sd_rtnl_message_new_link(link->manager->rtnl, &req, RTM_NEWLINK, link->ifindex); if (r < 0) return log_link_debug_errno(link, r, "Could not allocate RTM_NEWLINK message: %m"); @@ -468,6 +473,11 @@ static int link_configure( if (r < 0) return log_link_debug_errno(link, r, "Could not append IFLA_ADDRESS attribute: %m"); break; + case SET_LINK_IPOIB: + r = ipoib_set_netlink_message(link, req); + if (r < 0) + return r; + break; case SET_LINK_MASTER: r = sd_netlink_message_append_u32(req, IFLA_MASTER, PTR_TO_UINT32(userdata)); if (r < 0) @@ -508,6 +518,9 @@ static bool link_is_ready_to_call_set_link(Request *req) { int r; assert(req); + assert(req->link); + assert(req->link->manager); + assert(req->link->network); link = req->link; op = PTR_TO_INT(req->set_link_operation_ptr); @@ -556,8 +569,6 @@ static bool link_is_ready_to_call_set_link(Request *req) { case SET_LINK_MASTER: { uint32_t m = 0; - assert(link->network); - if (link->network->batadv) { if (!netdev_is_ready(link->network->batadv)) return false; @@ -590,6 +601,15 @@ static bool link_is_ready_to_call_set_link(Request *req) { req->userdata = UINT32_TO_PTR(m); break; } + case SET_LINK_MTU: { + Request req_ipoib = { + .link = link, + .type = REQUEST_TYPE_SET_LINK, + .set_link_operation_ptr = INT_TO_PTR(SET_LINK_IPOIB), + }; + + return !ordered_set_contains(link->manager->request_queue, &req_ipoib); + } default: break; } @@ -800,6 +820,20 @@ int link_request_to_set_mac(Link *link, bool allow_retry) { NULL); } +int link_request_to_set_ipoib(Link *link) { + assert(link); + assert(link->network); + + if (link->iftype != ARPHRD_INFINIBAND) + return 0; + + if (link->network->ipoib_mode < 0 && + link->network->ipoib_umcast < 0) + return 0; + + return link_request_set_link(link, SET_LINK_IPOIB, link_set_ipoib_handler, NULL); +} + int link_request_to_set_master(Link *link) { assert(link); assert(link->network); diff --git a/src/network/networkd-setlink.h b/src/network/networkd-setlink.h index d3e9f2b9d7b..39a85a6871d 100644 --- a/src/network/networkd-setlink.h +++ b/src/network/networkd-setlink.h @@ -14,6 +14,7 @@ typedef enum SetLinkOperation { SET_LINK_CAN, /* Setting CAN interface configs. */ SET_LINK_FLAGS, /* Setting IFF_NOARP or friends. */ SET_LINK_GROUP, /* Setting interface group. */ + SET_LINK_IPOIB, /* Setting IPoIB configs. */ SET_LINK_MAC, /* Setting MAC address. */ SET_LINK_MASTER, /* Setting IFLA_MASTER. */ SET_LINK_MTU, /* Setting MTU. */ @@ -33,6 +34,7 @@ int link_request_to_set_can(Link *link); int link_request_to_set_flags(Link *link); int link_request_to_set_group(Link *link); int link_request_to_set_mac(Link *link, bool allow_retry); +int link_request_to_set_ipoib(Link *link); int link_request_to_set_master(Link *link); int link_request_to_set_mtu(Link *link, uint32_t mtu); diff --git a/test/fuzz/fuzz-netdev-parser/directives.netdev b/test/fuzz/fuzz-netdev-parser/directives.netdev index e34d16af117..f5fa2418fee 100644 --- a/test/fuzz/fuzz-netdev-parser/directives.netdev +++ b/test/fuzz/fuzz-netdev-parser/directives.netdev @@ -241,3 +241,7 @@ GatewayBandwithUp= GatewayBandwidthDown= GatewayBandwidthUp= RoutingAlgorithm= +[IPoIB] +PartitionKey= +Mode= +IgnoreUserspaceMulticastGroups= diff --git a/test/fuzz/fuzz-network-parser/directives.network b/test/fuzz/fuzz-network-parser/directives.network index 5b5a4f8c60e..c1c00262341 100644 --- a/test/fuzz/fuzz-network-parser/directives.network +++ b/test/fuzz/fuzz-network-parser/directives.network @@ -242,6 +242,7 @@ IgnoreCarrierLoss= KeepConfiguration= DHCPv6PrefixDelegation= BatmanAdvanced= +IPoIB= [IPv6Prefix] Prefix= OnLink= @@ -286,6 +287,9 @@ Loopback= OneShot= PresumeACK= ClassicDataLengthCode= +[IPoIB] +Mode= +IgnoreUserspaceMulticastGroups= [Address] DuplicateAddressDetection= AutoJoin=