vxlan: add support for flowlabel inherit

By default, VXLAN encapsulation over IPv6 sets the flow label to 0, with
an option for a fixed value. This commits add the ability to inherit the
flow label from the inner packet, like for other tunnel implementations.
This enables devices using only L3 headers for ECMP to correctly balance
VXLAN-encapsulated IPv6 packets.

```
$ ./ip/ip link add dummy1 type dummy
$ ./ip/ip addr add 2001:db8::2/64 dev dummy1
$ ./ip/ip link set up dev dummy1
$ ./ip/ip link add vxlan1 type vxlan id 100 flowlabel inherit remote 2001:db8::1 local 2001:db8::2
$ ./ip/ip link set up dev vxlan1
$ ./ip/ip addr add 2001:db8:1::2/64 dev vxlan1
$ ./ip/ip link set arp off dev vxlan1
$ ping -q 2001:db8:1::1 &
$ tshark -d udp.port==8472,vxlan -Vpni dummy1 -c1
[...]
Internet Protocol Version 6, Src: 2001:db8::2, Dst: 2001:db8::1
    0110 .... = Version: 6
    .... 0000 0000 .... .... .... .... .... = Traffic Class: 0x00 (DSCP: CS0, ECN: Not-ECT)
        .... 0000 00.. .... .... .... .... .... = Differentiated Services Codepoint: Default (0)
        .... .... ..00 .... .... .... .... .... = Explicit Congestion Notification: Not ECN-Capable Transport (0)
    .... 1011 0001 1010 1111 1011 = Flow Label: 0xb1afb
[...]
Virtual eXtensible Local Area Network
    Flags: 0x0800, VXLAN Network ID (VNI)
    Group Policy ID: 0
    VXLAN Network Identifier (VNI): 100
[...]
Internet Protocol Version 6, Src: 2001:db8:1::2, Dst: 2001:db8:1::1
    0110 .... = Version: 6
    .... 0000 0000 .... .... .... .... .... = Traffic Class: 0x00 (DSCP: CS0, ECN: Not-ECT)
        .... 0000 00.. .... .... .... .... .... = Differentiated Services Codepoint: Default (0)
        .... .... ..00 .... .... .... .... .... = Explicit Congestion Notification: Not ECN-Capable Transport (0)
    .... 1011 0001 1010 1111 1011 = Flow Label: 0xb1afb
```

Signed-off-by: Alce Lafranque <alce@lafranque.net>
Co-developed-by: Vincent Bernat <vincent@bernat.ch>
Signed-off-by: Vincent Bernat <vincent@bernat.ch>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Alce Lafranque 2023-11-14 11:36:57 -06:00 committed by David S. Miller
parent 3bdd9fd29c
commit c6e9dba3be
4 changed files with 58 additions and 17 deletions

View file

@ -2379,7 +2379,17 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
else else
udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX); udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_IPV6)
key.label = vxlan->cfg.label; switch (vxlan->cfg.label_policy) {
case VXLAN_LABEL_FIXED:
key.label = vxlan->cfg.label;
break;
case VXLAN_LABEL_INHERIT:
key.label = ip_tunnel_get_flowlabel(old_iph, skb);
break;
default:
DEBUG_NET_WARN_ON_ONCE(1);
goto drop;
}
#endif #endif
} else { } else {
if (!info) { if (!info) {
@ -3366,6 +3376,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
[IFLA_VXLAN_DF] = { .type = NLA_U8 }, [IFLA_VXLAN_DF] = { .type = NLA_U8 },
[IFLA_VXLAN_VNIFILTER] = { .type = NLA_U8 }, [IFLA_VXLAN_VNIFILTER] = { .type = NLA_U8 },
[IFLA_VXLAN_LOCALBYPASS] = NLA_POLICY_MAX(NLA_U8, 1), [IFLA_VXLAN_LOCALBYPASS] = NLA_POLICY_MAX(NLA_U8, 1),
[IFLA_VXLAN_LABEL_POLICY] = NLA_POLICY_MAX(NLA_U32, VXLAN_LABEL_MAX),
}; };
static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[], static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
@ -3740,6 +3751,12 @@ static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf,
return -EINVAL; return -EINVAL;
} }
if (conf->label_policy && !use_ipv6) {
NL_SET_ERR_MSG(extack,
"Label policy only applies to IPv6 VXLAN devices");
return -EINVAL;
}
if (conf->remote_ifindex) { if (conf->remote_ifindex) {
struct net_device *lowerdev; struct net_device *lowerdev;
@ -4082,6 +4099,8 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
if (data[IFLA_VXLAN_LABEL]) if (data[IFLA_VXLAN_LABEL])
conf->label = nla_get_be32(data[IFLA_VXLAN_LABEL]) & conf->label = nla_get_be32(data[IFLA_VXLAN_LABEL]) &
IPV6_FLOWLABEL_MASK; IPV6_FLOWLABEL_MASK;
if (data[IFLA_VXLAN_LABEL_POLICY])
conf->label_policy = nla_get_u32(data[IFLA_VXLAN_LABEL_POLICY]);
if (data[IFLA_VXLAN_LEARNING]) { if (data[IFLA_VXLAN_LEARNING]) {
err = vxlan_nl2flag(conf, data, IFLA_VXLAN_LEARNING, err = vxlan_nl2flag(conf, data, IFLA_VXLAN_LEARNING,
@ -4398,6 +4417,7 @@ static size_t vxlan_get_size(const struct net_device *dev)
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_DF */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_DF */
nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */ nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */
nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LABEL_POLICY */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_PROXY */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_PROXY */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_RSC */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_RSC */
@ -4471,6 +4491,7 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) || nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) ||
nla_put_u8(skb, IFLA_VXLAN_DF, vxlan->cfg.df) || nla_put_u8(skb, IFLA_VXLAN_DF, vxlan->cfg.df) ||
nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) || nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) ||
nla_put_u32(skb, IFLA_VXLAN_LABEL_POLICY, vxlan->cfg.label_policy) ||
nla_put_u8(skb, IFLA_VXLAN_LEARNING, nla_put_u8(skb, IFLA_VXLAN_LEARNING,
!!(vxlan->cfg.flags & VXLAN_F_LEARN)) || !!(vxlan->cfg.flags & VXLAN_F_LEARN)) ||
nla_put_u8(skb, IFLA_VXLAN_PROXY, nla_put_u8(skb, IFLA_VXLAN_PROXY,

View file

@ -416,6 +416,17 @@ static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph,
return 0; return 0;
} }
static inline __be32 ip_tunnel_get_flowlabel(const struct iphdr *iph,
const struct sk_buff *skb)
{
__be16 payload_protocol = skb_protocol(skb, true);
if (payload_protocol == htons(ETH_P_IPV6))
return ip6_flowlabel((const struct ipv6hdr *)iph);
else
return 0;
}
static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph, static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph,
const struct sk_buff *skb) const struct sk_buff *skb)
{ {

View file

@ -210,22 +210,23 @@ struct vxlan_rdst {
}; };
struct vxlan_config { struct vxlan_config {
union vxlan_addr remote_ip; union vxlan_addr remote_ip;
union vxlan_addr saddr; union vxlan_addr saddr;
__be32 vni; __be32 vni;
int remote_ifindex; int remote_ifindex;
int mtu; int mtu;
__be16 dst_port; __be16 dst_port;
u16 port_min; u16 port_min;
u16 port_max; u16 port_max;
u8 tos; u8 tos;
u8 ttl; u8 ttl;
__be32 label; __be32 label;
u32 flags; enum ifla_vxlan_label_policy label_policy;
unsigned long age_interval; u32 flags;
unsigned int addrmax; unsigned long age_interval;
bool no_share; unsigned int addrmax;
enum ifla_vxlan_df df; bool no_share;
enum ifla_vxlan_df df;
}; };
enum { enum {

View file

@ -856,6 +856,7 @@ enum {
IFLA_VXLAN_DF, IFLA_VXLAN_DF,
IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */ IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */
IFLA_VXLAN_LOCALBYPASS, IFLA_VXLAN_LOCALBYPASS,
IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; ifla_vxlan_label_policy */
__IFLA_VXLAN_MAX __IFLA_VXLAN_MAX
}; };
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
@ -873,6 +874,13 @@ enum ifla_vxlan_df {
VXLAN_DF_MAX = __VXLAN_DF_END - 1, VXLAN_DF_MAX = __VXLAN_DF_END - 1,
}; };
enum ifla_vxlan_label_policy {
VXLAN_LABEL_FIXED = 0,
VXLAN_LABEL_INHERIT = 1,
__VXLAN_LABEL_END,
VXLAN_LABEL_MAX = __VXLAN_LABEL_END - 1,
};
/* GENEVE section */ /* GENEVE section */
enum { enum {
IFLA_GENEVE_UNSPEC, IFLA_GENEVE_UNSPEC,