Merge branch 'seg6-add-support-for-srv6-end-dt4-dt6-behavior'

Andrea Mayer says:

====================
seg6: add support for SRv6 End.DT4/DT6 behavior

This patchset provides support for the SRv6 End.DT4 and End.DT6 (VRF mode)
behaviors.

The SRv6 End.DT4 behavior is used to implement multi-tenant IPv4 L3 VPNs. It
decapsulates the received packets and performs IPv4 routing lookup in the
routing table of the tenant. The SRv6 End.DT4 Linux implementation leverages a
VRF device in order to force the routing lookup into the associated routing
table.
The SRv6 End.DT4 behavior is defined in the SRv6 Network Programming [1].

The Linux kernel already offers an implementation of the SRv6 End.DT6 behavior
which allows us to set up IPv6 L3 VPNs over SRv6 networks. This new
implementation of DT6 is based on the same VRF infrastructure already exploited
for implementing the SRv6 End.DT4 behavior. The aim of the new SRv6 End.DT6 in
VRF mode consists in simplifying the construction of IPv6 L3 VPN services in
the multi-tenant environment.
Currently, the two SRv6 End.DT6 implementations (legacy and VRF mode)
coexist seamlessly and can be chosen according to the context and the user
preferences.

- Patch 1 is needed to solve a pre-existing issue with tunneled packets
  when a sniffer is attached;

- Patch 2 improves the management of the seg6local attributes used by the
  SRv6 behaviors;

- Patch 3 adds support for optional attributes in SRv6 behaviors;

- Patch 4 introduces two callbacks used for customizing the
  creation/destruction of a SRv6 behavior;

- Patch 5 is the core patch that adds support for the SRv6 End.DT4
  behavior;

- Patch 6 introduces the VRF support for SRv6 End.DT6 behavior;

- Patch 7 adds the selftest for SRv6 End.DT4 behavior;

- Patch 8 adds the selftest for SRv6 End.DT6 (VRF mode) behavior.

Regarding iproute2, the support for the new "vrftable" attribute, required by
both SRv6 End.DT4 and End.DT6 (VRF mode) behaviors, is provided in a different
patchset that will follow shortly.

I would like to thank David Ahern for his support during the development of
this patchset.

[1] https://tools.ietf.org/html/draft-ietf-spring-srv6-network-programming
====================

Link: https://lore.kernel.org/r/20201202130517.4967-1-andrea.mayer@uniroma2.it
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2020-12-04 13:30:53 -08:00
commit 4be986c824
5 changed files with 1646 additions and 19 deletions

View file

@ -1310,6 +1310,61 @@ static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
skb_dst_set(skb, &rt6->dst);
}
static int vrf_prepare_mac_header(struct sk_buff *skb,
struct net_device *vrf_dev, u16 proto)
{
struct ethhdr *eth;
int err;
/* in general, we do not know if there is enough space in the head of
* the packet for hosting the mac header.
*/
err = skb_cow_head(skb, LL_RESERVED_SPACE(vrf_dev));
if (unlikely(err))
/* no space in the skb head */
return -ENOBUFS;
__skb_push(skb, ETH_HLEN);
eth = (struct ethhdr *)skb->data;
skb_reset_mac_header(skb);
/* we set the ethernet destination and the source addresses to the
* address of the VRF device.
*/
ether_addr_copy(eth->h_dest, vrf_dev->dev_addr);
ether_addr_copy(eth->h_source, vrf_dev->dev_addr);
eth->h_proto = htons(proto);
/* the destination address of the Ethernet frame corresponds to the
* address set on the VRF interface; therefore, the packet is intended
* to be processed locally.
*/
skb->protocol = eth->h_proto;
skb->pkt_type = PACKET_HOST;
skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
skb_pull_inline(skb, ETH_HLEN);
return 0;
}
/* prepare and add the mac header to the packet if it was not set previously.
* In this way, packet sniffers such as tcpdump can parse the packet correctly.
* If the mac header was already set, the original mac header is left
* untouched and the function returns immediately.
*/
static int vrf_add_mac_header_if_unset(struct sk_buff *skb,
struct net_device *vrf_dev,
u16 proto)
{
if (skb_mac_header_was_set(skb))
return 0;
return vrf_prepare_mac_header(skb, vrf_dev, proto);
}
static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
struct sk_buff *skb)
{
@ -1336,9 +1391,15 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
skb->skb_iif = vrf_dev->ifindex;
if (!list_empty(&vrf_dev->ptype_all)) {
skb_push(skb, skb->mac_len);
dev_queue_xmit_nit(skb, vrf_dev);
skb_pull(skb, skb->mac_len);
int err;
err = vrf_add_mac_header_if_unset(skb, vrf_dev,
ETH_P_IPV6);
if (likely(!err)) {
skb_push(skb, skb->mac_len);
dev_queue_xmit_nit(skb, vrf_dev);
skb_pull(skb, skb->mac_len);
}
}
IP6CB(skb)->flags |= IP6SKB_L3SLAVE;
@ -1381,9 +1442,14 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev,
vrf_rx_stats(vrf_dev, skb->len);
if (!list_empty(&vrf_dev->ptype_all)) {
skb_push(skb, skb->mac_len);
dev_queue_xmit_nit(skb, vrf_dev);
skb_pull(skb, skb->mac_len);
int err;
err = vrf_add_mac_header_if_unset(skb, vrf_dev, ETH_P_IP);
if (likely(!err)) {
skb_push(skb, skb->mac_len);
dev_queue_xmit_nit(skb, vrf_dev);
skb_pull(skb, skb->mac_len);
}
}
skb = vrf_rcv_nfhook(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, vrf_dev);

View file

@ -26,6 +26,7 @@ enum {
SEG6_LOCAL_IIF,
SEG6_LOCAL_OIF,
SEG6_LOCAL_BPF,
SEG6_LOCAL_VRFTABLE,
__SEG6_LOCAL_MAX,
};
#define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1)

View file

@ -33,11 +33,35 @@
struct seg6_local_lwt;
/* callbacks used for customizing the creation and destruction of a behavior */
struct seg6_local_lwtunnel_ops {
int (*build_state)(struct seg6_local_lwt *slwt, const void *cfg,
struct netlink_ext_ack *extack);
void (*destroy_state)(struct seg6_local_lwt *slwt);
};
struct seg6_action_desc {
int action;
unsigned long attrs;
/* The optattrs field is used for specifying all the optional
* attributes supported by a specific behavior.
* It means that if one of these attributes is not provided in the
* netlink message during the behavior creation, no errors will be
* returned to the userspace.
*
* Each attribute can be only of two types (mutually exclusive):
* 1) required or 2) optional.
* Every user MUST obey to this rule! If you set an attribute as
* required the same attribute CANNOT be set as optional and vice
* versa.
*/
unsigned long optattrs;
int (*input)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
int static_headroom;
struct seg6_local_lwtunnel_ops slwt_ops;
};
struct bpf_lwt_prog {
@ -45,6 +69,28 @@ struct bpf_lwt_prog {
char *name;
};
enum seg6_end_dt_mode {
DT_INVALID_MODE = -EINVAL,
DT_LEGACY_MODE = 0,
DT_VRF_MODE = 1,
};
struct seg6_end_dt_info {
enum seg6_end_dt_mode mode;
struct net *net;
/* VRF device associated to the routing table used by the SRv6
* End.DT4/DT6 behavior for routing IPv4/IPv6 packets.
*/
int vrf_ifindex;
int vrf_table;
/* tunneled packet proto and family (IPv4 or IPv6) */
__be16 proto;
u16 family;
int hdrlen;
};
struct seg6_local_lwt {
int action;
struct ipv6_sr_hdr *srh;
@ -54,9 +100,16 @@ struct seg6_local_lwt {
int iif;
int oif;
struct bpf_lwt_prog bpf;
#ifdef CONFIG_NET_L3_MASTER_DEV
struct seg6_end_dt_info dt_info;
#endif
int headroom;
struct seg6_action_desc *desc;
/* unlike the required attrs, we have to track the optional attributes
* that have been effectively parsed.
*/
unsigned long parsed_optattrs;
};
static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt)
@ -401,6 +454,248 @@ static int input_action_end_dx4(struct sk_buff *skb,
return -EINVAL;
}
#ifdef CONFIG_NET_L3_MASTER_DEV
static struct net *fib6_config_get_net(const struct fib6_config *fib6_cfg)
{
const struct nl_info *nli = &fib6_cfg->fc_nlinfo;
return nli->nl_net;
}
static int __seg6_end_dt_vrf_build(struct seg6_local_lwt *slwt, const void *cfg,
u16 family, struct netlink_ext_ack *extack)
{
struct seg6_end_dt_info *info = &slwt->dt_info;
int vrf_ifindex;
struct net *net;
net = fib6_config_get_net(cfg);
/* note that vrf_table was already set by parse_nla_vrftable() */
vrf_ifindex = l3mdev_ifindex_lookup_by_table_id(L3MDEV_TYPE_VRF, net,
info->vrf_table);
if (vrf_ifindex < 0) {
if (vrf_ifindex == -EPERM) {
NL_SET_ERR_MSG(extack,
"Strict mode for VRF is disabled");
} else if (vrf_ifindex == -ENODEV) {
NL_SET_ERR_MSG(extack,
"Table has no associated VRF device");
} else {
pr_debug("seg6local: SRv6 End.DT* creation error=%d\n",
vrf_ifindex);
}
return vrf_ifindex;
}
info->net = net;
info->vrf_ifindex = vrf_ifindex;
switch (family) {
case AF_INET:
info->proto = htons(ETH_P_IP);
info->hdrlen = sizeof(struct iphdr);
break;
case AF_INET6:
info->proto = htons(ETH_P_IPV6);
info->hdrlen = sizeof(struct ipv6hdr);
break;
default:
return -EINVAL;
}
info->family = family;
info->mode = DT_VRF_MODE;
return 0;
}
/* The SRv6 End.DT4/DT6 behavior extracts the inner (IPv4/IPv6) packet and
* routes the IPv4/IPv6 packet by looking at the configured routing table.
*
* In the SRv6 End.DT4/DT6 use case, we can receive traffic (IPv6+Segment
* Routing Header packets) from several interfaces and the outer IPv6
* destination address (DA) is used for retrieving the specific instance of the
* End.DT4/DT6 behavior that should process the packets.
*
* However, the inner IPv4/IPv6 packet is not really bound to any receiving
* interface and thus the End.DT4/DT6 sets the VRF (associated with the
* corresponding routing table) as the *receiving* interface.
* In other words, the End.DT4/DT6 processes a packet as if it has been received
* directly by the VRF (and not by one of its slave devices, if any).
* In this way, the VRF interface is used for routing the IPv4/IPv6 packet in
* according to the routing table configured by the End.DT4/DT6 instance.
*
* This design allows you to get some interesting features like:
* 1) the statistics on rx packets;
* 2) the possibility to install a packet sniffer on the receiving interface
* (the VRF one) for looking at the incoming packets;
* 3) the possibility to leverage the netfilter prerouting hook for the inner
* IPv4 packet.
*
* This function returns:
* - the sk_buff* when the VRF rcv handler has processed the packet correctly;
* - NULL when the skb is consumed by the VRF rcv handler;
* - a pointer which encodes a negative error number in case of error.
* Note that in this case, the function takes care of freeing the skb.
*/
static struct sk_buff *end_dt_vrf_rcv(struct sk_buff *skb, u16 family,
struct net_device *dev)
{
/* based on l3mdev_ip_rcv; we are only interested in the master */
if (unlikely(!netif_is_l3_master(dev) && !netif_has_l3_rx_handler(dev)))
goto drop;
if (unlikely(!dev->l3mdev_ops->l3mdev_l3_rcv))
goto drop;
/* the decap packet IPv4/IPv6 does not come with any mac header info.
* We must unset the mac header to allow the VRF device to rebuild it,
* just in case there is a sniffer attached on the device.
*/
skb_unset_mac_header(skb);
skb = dev->l3mdev_ops->l3mdev_l3_rcv(dev, skb, family);
if (!skb)
/* the skb buffer was consumed by the handler */
return NULL;
/* when a packet is received by a VRF or by one of its slaves, the
* master device reference is set into the skb.
*/
if (unlikely(skb->dev != dev || skb->skb_iif != dev->ifindex))
goto drop;
return skb;
drop:
kfree_skb(skb);
return ERR_PTR(-EINVAL);
}
static struct net_device *end_dt_get_vrf_rcu(struct sk_buff *skb,
struct seg6_end_dt_info *info)
{
int vrf_ifindex = info->vrf_ifindex;
struct net *net = info->net;
if (unlikely(vrf_ifindex < 0))
goto error;
if (unlikely(!net_eq(dev_net(skb->dev), net)))
goto error;
return dev_get_by_index_rcu(net, vrf_ifindex);
error:
return NULL;
}
static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
struct seg6_end_dt_info *info = &slwt->dt_info;
struct net_device *vrf;
vrf = end_dt_get_vrf_rcu(skb, info);
if (unlikely(!vrf))
goto drop;
skb->protocol = info->proto;
skb_dst_drop(skb);
skb_set_transport_header(skb, info->hdrlen);
return end_dt_vrf_rcv(skb, info->family, vrf);
drop:
kfree_skb(skb);
return ERR_PTR(-EINVAL);
}
static int input_action_end_dt4(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
struct iphdr *iph;
int err;
if (!decap_and_validate(skb, IPPROTO_IPIP))
goto drop;
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto drop;
skb = end_dt_vrf_core(skb, slwt);
if (!skb)
/* packet has been processed and consumed by the VRF */
return 0;
if (IS_ERR(skb))
return PTR_ERR(skb);
iph = ip_hdr(skb);
err = ip_route_input(skb, iph->daddr, iph->saddr, 0, skb->dev);
if (unlikely(err))
goto drop;
return dst_input(skb);
drop:
kfree_skb(skb);
return -EINVAL;
}
static int seg6_end_dt4_build(struct seg6_local_lwt *slwt, const void *cfg,
struct netlink_ext_ack *extack)
{
return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET, extack);
}
static enum
seg6_end_dt_mode seg6_end_dt6_parse_mode(struct seg6_local_lwt *slwt)
{
unsigned long parsed_optattrs = slwt->parsed_optattrs;
bool legacy, vrfmode;
legacy = !!(parsed_optattrs & (1 << SEG6_LOCAL_TABLE));
vrfmode = !!(parsed_optattrs & (1 << SEG6_LOCAL_VRFTABLE));
if (!(legacy ^ vrfmode))
/* both are absent or present: invalid DT6 mode */
return DT_INVALID_MODE;
return legacy ? DT_LEGACY_MODE : DT_VRF_MODE;
}
static enum seg6_end_dt_mode seg6_end_dt6_get_mode(struct seg6_local_lwt *slwt)
{
struct seg6_end_dt_info *info = &slwt->dt_info;
return info->mode;
}
static int seg6_end_dt6_build(struct seg6_local_lwt *slwt, const void *cfg,
struct netlink_ext_ack *extack)
{
enum seg6_end_dt_mode mode = seg6_end_dt6_parse_mode(slwt);
struct seg6_end_dt_info *info = &slwt->dt_info;
switch (mode) {
case DT_LEGACY_MODE:
info->mode = DT_LEGACY_MODE;
return 0;
case DT_VRF_MODE:
return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET6, extack);
default:
NL_SET_ERR_MSG(extack, "table or vrftable must be specified");
return -EINVAL;
}
}
#endif
static int input_action_end_dt6(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
@ -410,6 +705,28 @@ static int input_action_end_dt6(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto drop;
#ifdef CONFIG_NET_L3_MASTER_DEV
if (seg6_end_dt6_get_mode(slwt) == DT_LEGACY_MODE)
goto legacy_mode;
/* DT6_VRF_MODE */
skb = end_dt_vrf_core(skb, slwt);
if (!skb)
/* packet has been processed and consumed by the VRF */
return 0;
if (IS_ERR(skb))
return PTR_ERR(skb);
/* note: this time we do not need to specify the table because the VRF
* takes care of selecting the correct table.
*/
seg6_lookup_any_nexthop(skb, NULL, 0, true);
return dst_input(skb);
legacy_mode:
#endif
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
seg6_lookup_any_nexthop(skb, NULL, slwt->table, true);
@ -589,9 +906,28 @@ static struct seg6_action_desc seg6_action_table[] = {
.attrs = (1 << SEG6_LOCAL_NH4),
.input = input_action_end_dx4,
},
{
.action = SEG6_LOCAL_ACTION_END_DT4,
.attrs = (1 << SEG6_LOCAL_VRFTABLE),
#ifdef CONFIG_NET_L3_MASTER_DEV
.input = input_action_end_dt4,
.slwt_ops = {
.build_state = seg6_end_dt4_build,
},
#endif
},
{
.action = SEG6_LOCAL_ACTION_END_DT6,
#ifdef CONFIG_NET_L3_MASTER_DEV
.attrs = 0,
.optattrs = (1 << SEG6_LOCAL_TABLE) |
(1 << SEG6_LOCAL_VRFTABLE),
.slwt_ops = {
.build_state = seg6_end_dt6_build,
},
#else
.attrs = (1 << SEG6_LOCAL_TABLE),
#endif
.input = input_action_end_dt6,
},
{
@ -649,6 +985,7 @@ static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
[SEG6_LOCAL_ACTION] = { .type = NLA_U32 },
[SEG6_LOCAL_SRH] = { .type = NLA_BINARY },
[SEG6_LOCAL_TABLE] = { .type = NLA_U32 },
[SEG6_LOCAL_VRFTABLE] = { .type = NLA_U32 },
[SEG6_LOCAL_NH4] = { .type = NLA_BINARY,
.len = sizeof(struct in_addr) },
[SEG6_LOCAL_NH6] = { .type = NLA_BINARY,
@ -710,6 +1047,11 @@ static int cmp_nla_srh(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
return memcmp(a->srh, b->srh, len);
}
static void destroy_attr_srh(struct seg6_local_lwt *slwt)
{
kfree(slwt->srh);
}
static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt)
{
slwt->table = nla_get_u32(attrs[SEG6_LOCAL_TABLE]);
@ -733,6 +1075,53 @@ static int cmp_nla_table(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
return 0;
}
static struct
seg6_end_dt_info *seg6_possible_end_dt_info(struct seg6_local_lwt *slwt)
{
#ifdef CONFIG_NET_L3_MASTER_DEV
return &slwt->dt_info;
#else
return ERR_PTR(-EOPNOTSUPP);
#endif
}
static int parse_nla_vrftable(struct nlattr **attrs,
struct seg6_local_lwt *slwt)
{
struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt);
if (IS_ERR(info))
return PTR_ERR(info);
info->vrf_table = nla_get_u32(attrs[SEG6_LOCAL_VRFTABLE]);
return 0;
}
static int put_nla_vrftable(struct sk_buff *skb, struct seg6_local_lwt *slwt)
{
struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt);
if (IS_ERR(info))
return PTR_ERR(info);
if (nla_put_u32(skb, SEG6_LOCAL_VRFTABLE, info->vrf_table))
return -EMSGSIZE;
return 0;
}
static int cmp_nla_vrftable(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
{
struct seg6_end_dt_info *info_a = seg6_possible_end_dt_info(a);
struct seg6_end_dt_info *info_b = seg6_possible_end_dt_info(b);
if (info_a->vrf_table != info_b->vrf_table)
return 1;
return 0;
}
static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt)
{
memcpy(&slwt->nh4, nla_data(attrs[SEG6_LOCAL_NH4]),
@ -901,16 +1290,30 @@ static int cmp_nla_bpf(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
return strcmp(a->bpf.name, b->bpf.name);
}
static void destroy_attr_bpf(struct seg6_local_lwt *slwt)
{
kfree(slwt->bpf.name);
if (slwt->bpf.prog)
bpf_prog_put(slwt->bpf.prog);
}
struct seg6_action_param {
int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt);
int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
int (*cmp)(struct seg6_local_lwt *a, struct seg6_local_lwt *b);
/* optional destroy() callback useful for releasing resources which
* have been previously acquired in the corresponding parse()
* function.
*/
void (*destroy)(struct seg6_local_lwt *slwt);
};
static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
[SEG6_LOCAL_SRH] = { .parse = parse_nla_srh,
.put = put_nla_srh,
.cmp = cmp_nla_srh },
.cmp = cmp_nla_srh,
.destroy = destroy_attr_srh },
[SEG6_LOCAL_TABLE] = { .parse = parse_nla_table,
.put = put_nla_table,
@ -934,14 +1337,130 @@ static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
[SEG6_LOCAL_BPF] = { .parse = parse_nla_bpf,
.put = put_nla_bpf,
.cmp = cmp_nla_bpf },
.cmp = cmp_nla_bpf,
.destroy = destroy_attr_bpf },
[SEG6_LOCAL_VRFTABLE] = { .parse = parse_nla_vrftable,
.put = put_nla_vrftable,
.cmp = cmp_nla_vrftable },
};
/* call the destroy() callback (if available) for each set attribute in
* @parsed_attrs, starting from the first attribute up to the @max_parsed
* (excluded) attribute.
*/
static void __destroy_attrs(unsigned long parsed_attrs, int max_parsed,
struct seg6_local_lwt *slwt)
{
struct seg6_action_param *param;
int i;
/* Every required seg6local attribute is identified by an ID which is
* encoded as a flag (i.e: 1 << ID) in the 'attrs' bitmask;
*
* We scan the 'parsed_attrs' bitmask, starting from the first attribute
* up to the @max_parsed (excluded) attribute.
* For each set attribute, we retrieve the corresponding destroy()
* callback. If the callback is not available, then we skip to the next
* attribute; otherwise, we call the destroy() callback.
*/
for (i = 0; i < max_parsed; ++i) {
if (!(parsed_attrs & (1 << i)))
continue;
param = &seg6_action_params[i];
if (param->destroy)
param->destroy(slwt);
}
}
/* release all the resources that may have been acquired during parsing
* operations.
*/
static void destroy_attrs(struct seg6_local_lwt *slwt)
{
unsigned long attrs = slwt->desc->attrs | slwt->parsed_optattrs;
__destroy_attrs(attrs, SEG6_LOCAL_MAX + 1, slwt);
}
static int parse_nla_optional_attrs(struct nlattr **attrs,
struct seg6_local_lwt *slwt)
{
struct seg6_action_desc *desc = slwt->desc;
unsigned long parsed_optattrs = 0;
struct seg6_action_param *param;
int err, i;
for (i = 0; i < SEG6_LOCAL_MAX + 1; ++i) {
if (!(desc->optattrs & (1 << i)) || !attrs[i])
continue;
/* once here, the i-th attribute is provided by the
* userspace AND it is identified optional as well.
*/
param = &seg6_action_params[i];
err = param->parse(attrs, slwt);
if (err < 0)
goto parse_optattrs_err;
/* current attribute has been correctly parsed */
parsed_optattrs |= (1 << i);
}
/* store in the tunnel state all the optional attributed successfully
* parsed.
*/
slwt->parsed_optattrs = parsed_optattrs;
return 0;
parse_optattrs_err:
__destroy_attrs(parsed_optattrs, i, slwt);
return err;
}
/* call the custom constructor of the behavior during its initialization phase
* and after that all its attributes have been parsed successfully.
*/
static int
seg6_local_lwtunnel_build_state(struct seg6_local_lwt *slwt, const void *cfg,
struct netlink_ext_ack *extack)
{
struct seg6_action_desc *desc = slwt->desc;
struct seg6_local_lwtunnel_ops *ops;
ops = &desc->slwt_ops;
if (!ops->build_state)
return 0;
return ops->build_state(slwt, cfg, extack);
}
/* call the custom destructor of the behavior which is invoked before the
* tunnel is going to be destroyed.
*/
static void seg6_local_lwtunnel_destroy_state(struct seg6_local_lwt *slwt)
{
struct seg6_action_desc *desc = slwt->desc;
struct seg6_local_lwtunnel_ops *ops;
ops = &desc->slwt_ops;
if (!ops->destroy_state)
return;
ops->destroy_state(slwt);
}
static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
{
struct seg6_action_param *param;
struct seg6_action_desc *desc;
unsigned long invalid_attrs;
int i, err;
desc = __get_action_desc(slwt->action);
@ -954,6 +1473,26 @@ static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
slwt->desc = desc;
slwt->headroom += desc->static_headroom;
/* Forcing the desc->optattrs *set* and the desc->attrs *set* to be
* disjoined, this allow us to release acquired resources by optional
* attributes and by required attributes independently from each other
* without any interfarence.
* In other terms, we are sure that we do not release some the acquired
* resources twice.
*
* Note that if an attribute is configured both as required and as
* optional, it means that the user has messed something up in the
* seg6_action_table. Therefore, this check is required for SRv6
* behaviors to work properly.
*/
invalid_attrs = desc->attrs & desc->optattrs;
if (invalid_attrs) {
WARN_ONCE(1,
"An attribute cannot be both required AND optional");
return -EINVAL;
}
/* parse the required attributes */
for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
if (desc->attrs & (1 << i)) {
if (!attrs[i])
@ -963,11 +1502,24 @@ static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
err = param->parse(attrs, slwt);
if (err < 0)
return err;
goto parse_attrs_err;
}
}
/* parse the optional attributes, if any */
err = parse_nla_optional_attrs(attrs, slwt);
if (err < 0)
goto parse_attrs_err;
return 0;
parse_attrs_err:
/* release any resource that may have been acquired during the i-1
* parse() operations.
*/
__destroy_attrs(desc->attrs, i, slwt);
return err;
}
static int seg6_local_build_state(struct net *net, struct nlattr *nla,
@ -1003,6 +1555,10 @@ static int seg6_local_build_state(struct net *net, struct nlattr *nla,
if (err < 0)
goto out_free;
err = seg6_local_lwtunnel_build_state(slwt, cfg, extack);
if (err < 0)
goto out_destroy_attrs;
newts->type = LWTUNNEL_ENCAP_SEG6_LOCAL;
newts->flags = LWTUNNEL_STATE_INPUT_REDIRECT;
newts->headroom = slwt->headroom;
@ -1011,8 +1567,9 @@ static int seg6_local_build_state(struct net *net, struct nlattr *nla,
return 0;
out_destroy_attrs:
destroy_attrs(slwt);
out_free:
kfree(slwt->srh);
kfree(newts);
return err;
}
@ -1021,12 +1578,9 @@ static void seg6_local_destroy_state(struct lwtunnel_state *lwt)
{
struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
kfree(slwt->srh);
seg6_local_lwtunnel_destroy_state(slwt);
if (slwt->desc->attrs & (1 << SEG6_LOCAL_BPF)) {
kfree(slwt->bpf.name);
bpf_prog_put(slwt->bpf.prog);
}
destroy_attrs(slwt);
return;
}
@ -1036,13 +1590,16 @@ static int seg6_local_fill_encap(struct sk_buff *skb,
{
struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
struct seg6_action_param *param;
unsigned long attrs;
int i, err;
if (nla_put_u32(skb, SEG6_LOCAL_ACTION, slwt->action))
return -EMSGSIZE;
attrs = slwt->desc->attrs | slwt->parsed_optattrs;
for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
if (slwt->desc->attrs & (1 << i)) {
if (attrs & (1 << i)) {
param = &seg6_action_params[i];
err = param->put(skb, slwt);
if (err < 0)
@ -1061,7 +1618,7 @@ static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
nlsize = nla_total_size(4); /* action */
attrs = slwt->desc->attrs;
attrs = slwt->desc->attrs | slwt->parsed_optattrs;
if (attrs & (1 << SEG6_LOCAL_SRH))
nlsize += nla_total_size((slwt->srh->hdrlen + 1) << 3);
@ -1086,6 +1643,9 @@ static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
nla_total_size(MAX_PROG_NAME) +
nla_total_size(4);
if (attrs & (1 << SEG6_LOCAL_VRFTABLE))
nlsize += nla_total_size(4);
return nlsize;
}
@ -1094,6 +1654,7 @@ static int seg6_local_cmp_encap(struct lwtunnel_state *a,
{
struct seg6_local_lwt *slwt_a, *slwt_b;
struct seg6_action_param *param;
unsigned long attrs_a, attrs_b;
int i;
slwt_a = seg6_local_lwtunnel(a);
@ -1102,11 +1663,14 @@ static int seg6_local_cmp_encap(struct lwtunnel_state *a,
if (slwt_a->action != slwt_b->action)
return 1;
if (slwt_a->desc->attrs != slwt_b->desc->attrs)
attrs_a = slwt_a->desc->attrs | slwt_a->parsed_optattrs;
attrs_b = slwt_b->desc->attrs | slwt_b->parsed_optattrs;
if (attrs_a != attrs_b)
return 1;
for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
if (slwt_a->desc->attrs & (1 << i)) {
if (attrs_a & (1 << i)) {
param = &seg6_action_params[i];
if (param->cmp(slwt_a, slwt_b))
return 1;

View file

@ -0,0 +1,494 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# author: Andrea Mayer <andrea.mayer@uniroma2.it>
# This test is designed for evaluating the new SRv6 End.DT4 behavior used for
# implementing IPv4 L3 VPN use cases.
#
# Hereafter a network diagram is shown, where two different tenants (named 100
# and 200) offer IPv4 L3 VPN services allowing hosts to communicate with each
# other across an IPv6 network.
#
# Only hosts belonging to the same tenant (and to the same VPN) can communicate
# with each other. Instead, the communication among hosts of different tenants
# is forbidden.
# In other words, hosts hs-t100-1 and hs-t100-2 are connected through the IPv4
# L3 VPN of tenant 100 while hs-t200-3 and hs-t200-4 are connected using the
# IPv4 L3 VPN of tenant 200. Cross connection between tenant 100 and tenant 200
# is forbidden and thus, for example, hs-t100-1 cannot reach hs-t200-3 and vice
# versa.
#
# Routers rt-1 and rt-2 implement IPv4 L3 VPN services leveraging the SRv6
# architecture. The key components for such VPNs are: a) SRv6 Encap behavior,
# b) SRv6 End.DT4 behavior and c) VRF.
#
# To explain how an IPv4 L3 VPN based on SRv6 works, let us briefly consider an
# example where, within the same domain of tenant 100, the host hs-t100-1 pings
# the host hs-t100-2.
#
# First of all, L2 reachability of the host hs-t100-2 is taken into account by
# the router rt-1 which acts as an arp proxy.
#
# When the host hs-t100-1 sends an IPv4 packet destined to hs-t100-2, the
# router rt-1 receives the packet on the internal veth-t100 interface. Such
# interface is enslaved to the VRF vrf-100 whose associated table contains the
# SRv6 Encap route for encapsulating any IPv4 packet in a IPv6 plus the Segment
# Routing Header (SRH) packet. This packet is sent through the (IPv6) core
# network up to the router rt-2 that receives it on veth0 interface.
#
# The rt-2 router uses the 'localsid' routing table to process incoming
# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these
# packets, the SRv6 End.DT4 behavior removes the outer IPv6+SRH headers and
# performs the lookup on the vrf-100 table using the destination address of
# the decapsulated IPv4 packet. Afterwards, the packet is sent to the host
# hs-t100-2 through the veth-t100 interface.
#
# The ping response follows the same processing but this time the role of rt-1
# and rt-2 are swapped.
#
# Of course, the IPv4 L3 VPN for tenant 200 works exactly as the IPv4 L3 VPN
# for tenant 100. In this case, only hosts hs-t200-3 and hs-t200-4 are able to
# connect with each other.
#
#
# +-------------------+ +-------------------+
# | | | |
# | hs-t100-1 netns | | hs-t100-2 netns |
# | | | |
# | +-------------+ | | +-------------+ |
# | | veth0 | | | | veth0 | |
# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | |
# | +-------------+ | | +-------------+ |
# | . | | . |
# +-------------------+ +-------------------+
# . .
# . .
# . .
# +-----------------------------------+ +-----------------------------------+
# | . | | . |
# | +---------------+ | | +---------------- |
# | | veth-t100 | | | | veth-t100 | |
# | | 10.0.0.254/24 | +----------+ | | +----------+ | 10.0.0.254/24 | |
# | +-------+-------+ | localsid | | | | localsid | +-------+-------- |
# | | | table | | | | table | | |
# | +----+----+ +----------+ | | +----------+ +----+----+ |
# | | vrf-100 | | | | vrf-100 | |
# | +---------+ +------------+ | | +------------+ +---------+ |
# | | veth0 | | | | veth0 | |
# | | fd00::1/64 |.|...|.| fd00::2/64 | |
# | +---------+ +------------+ | | +------------+ +---------+ |
# | | vrf-200 | | | | vrf-200 | |
# | +----+----+ | | +----+----+ |
# | | | | | |
# | +-------+-------+ | | +-------+-------- |
# | | veth-t200 | | | | veth-t200 | |
# | | 10.0.0.254/24 | | | | 10.0.0.254/24 | |
# | +---------------+ rt-1 netns | | rt-2 netns +---------------- |
# | . | | . |
# +-----------------------------------+ +-----------------------------------+
# . .
# . .
# . .
# . .
# +-------------------+ +-------------------+
# | . | | . |
# | +-------------+ | | +-------------+ |
# | | veth0 | | | | veth0 | |
# | | 10.0.0.3/24 | | | | 10.0.0.4/24 | |
# | +-------------+ | | +-------------+ |
# | | | |
# | hs-t200-3 netns | | hs-t200-4 netns |
# | | | |
# +-------------------+ +-------------------+
#
#
# ~~~~~~~~~~~~~~~~~~~~~~~~~
# | Network configuration |
# ~~~~~~~~~~~~~~~~~~~~~~~~~
#
# rt-1: localsid table (table 90)
# +-------------------------------------------------+
# |SID |Action |
# +-------------------------------------------------+
# |fc00:21:100::6004|apply SRv6 End.DT4 vrftable 100|
# +-------------------------------------------------+
# |fc00:21:200::6004|apply SRv6 End.DT4 vrftable 200|
# +-------------------------------------------------+
#
# rt-1: VRF tenant 100 (table 100)
# +---------------------------------------------------+
# |host |Action |
# +---------------------------------------------------+
# |10.0.0.2 |apply seg6 encap segs fc00:12:100::6004|
# +---------------------------------------------------+
# |10.0.0.0/24|forward to dev veth_t100 |
# +---------------------------------------------------+
#
# rt-1: VRF tenant 200 (table 200)
# +---------------------------------------------------+
# |host |Action |
# +---------------------------------------------------+
# |10.0.0.4 |apply seg6 encap segs fc00:12:200::6004|
# +---------------------------------------------------+
# |10.0.0.0/24|forward to dev veth_t200 |
# +---------------------------------------------------+
#
#
# rt-2: localsid table (table 90)
# +-------------------------------------------------+
# |SID |Action |
# +-------------------------------------------------+
# |fc00:12:100::6004|apply SRv6 End.DT4 vrftable 100|
# +-------------------------------------------------+
# |fc00:12:200::6004|apply SRv6 End.DT4 vrftable 200|
# +-------------------------------------------------+
#
# rt-2: VRF tenant 100 (table 100)
# +---------------------------------------------------+
# |host |Action |
# +---------------------------------------------------+
# |10.0.0.1 |apply seg6 encap segs fc00:21:100::6004|
# +---------------------------------------------------+
# |10.0.0.0/24|forward to dev veth_t100 |
# +---------------------------------------------------+
#
# rt-2: VRF tenant 200 (table 200)
# +---------------------------------------------------+
# |host |Action |
# +---------------------------------------------------+
# |10.0.0.3 |apply seg6 encap segs fc00:21:200::6004|
# +---------------------------------------------------+
# |10.0.0.0/24|forward to dev veth_t200 |
# +---------------------------------------------------+
#
readonly LOCALSID_TABLE_ID=90
readonly IPv6_RT_NETWORK=fd00
readonly IPv4_HS_NETWORK=10.0.0
readonly VPN_LOCATOR_SERVICE=fc00
PING_TIMEOUT_SEC=4
ret=0
PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
log_test()
{
local rc=$1
local expected=$2
local msg="$3"
if [ ${rc} -eq ${expected} ]; then
nsuccess=$((nsuccess+1))
printf "\n TEST: %-60s [ OK ]\n" "${msg}"
else
ret=1
nfail=$((nfail+1))
printf "\n TEST: %-60s [FAIL]\n" "${msg}"
if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
echo
echo "hit enter to continue, 'q' to quit"
read a
[ "$a" = "q" ] && exit 1
fi
fi
}
print_log_test_results()
{
if [ "$TESTS" != "none" ]; then
printf "\nTests passed: %3d\n" ${nsuccess}
printf "Tests failed: %3d\n" ${nfail}
fi
}
log_section()
{
echo
echo "################################################################################"
echo "TEST SECTION: $*"
echo "################################################################################"
}
cleanup()
{
ip link del veth-rt-1 2>/dev/null || true
ip link del veth-rt-2 2>/dev/null || true
# destroy routers rt-* and hosts hs-*
for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do
ip netns del ${ns} || true
done
}
# Setup the basic networking for the routers
setup_rt_networking()
{
local rt=$1
local nsname=rt-${rt}
ip netns add ${nsname}
ip link set veth-rt-${rt} netns ${nsname}
ip -netns ${nsname} link set veth-rt-${rt} name veth0
ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0
ip -netns ${nsname} link set veth0 up
ip -netns ${nsname} link set lo up
ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1
ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1
}
setup_hs()
{
local hs=$1
local rt=$2
local tid=$3
local hsname=hs-t${tid}-${hs}
local rtname=rt-${rt}
local rtveth=veth-t${tid}
# set the networking for the host
ip netns add ${hsname}
ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
ip -netns ${hsname} link set ${rtveth} netns ${rtname}
ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0
ip -netns ${hsname} link set veth0 up
ip -netns ${hsname} link set lo up
# configure the VRF for the tenant X on the router which is directly
# connected to the source host.
ip -netns ${rtname} link add vrf-${tid} type vrf table ${tid}
ip -netns ${rtname} link set vrf-${tid} up
# enslave the veth-tX interface to the vrf-X in the access router
ip -netns ${rtname} link set ${rtveth} master vrf-${tid}
ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.254/24 dev ${rtveth}
ip -netns ${rtname} link set ${rtveth} up
ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1
# disable the rp_filter otherwise the kernel gets confused about how
# to route decap ipv4 packets.
ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.rp_filter=0
ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
}
setup_vpn_config()
{
local hssrc=$1
local rtsrc=$2
local hsdst=$3
local rtdst=$4
local tid=$5
local hssrc_name=hs-t${tid}-${hssrc}
local hsdst_name=hs-t${tid}-${hsdst}
local rtsrc_name=rt-${rtsrc}
local rtdst_name=rt-${rtdst}
local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6004
# set the encap route for encapsulating packets which arrive from the
# host hssrc and destined to the access router rtsrc.
ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 vrf vrf-${tid} \
encap seg6 mode encap segs ${vpn_sid} dev veth0
ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 vrf vrf-${tid} \
via fd00::${rtdst} dev veth0
# set the decap route for decapsulating packets which arrive from
# the rtdst router and destined to the hsdst host.
ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 table ${LOCALSID_TABLE_ID} \
encap seg6local action End.DT4 vrftable ${tid} dev vrf-${tid}
# all sids for VPNs start with a common locator which is fc00::/16.
# Routes for handling the SRv6 End.DT4 behavior instances are grouped
# together in the 'localsid' table.
#
# NOTE: added only once
if [ -z "$(ip -netns ${rtdst_name} -6 rule show | \
grep "to ${VPN_LOCATOR_SERVICE}::/16 lookup ${LOCALSID_TABLE_ID}")" ]; then
ip -netns ${rtdst_name} -6 rule add \
to ${VPN_LOCATOR_SERVICE}::/16 \
lookup ${LOCALSID_TABLE_ID} prio 999
fi
}
setup()
{
ip link add veth-rt-1 type veth peer name veth-rt-2
# setup the networking for router rt-1 and router rt-2
setup_rt_networking 1
setup_rt_networking 2
# setup two hosts for the tenant 100.
# - host hs-1 is directly connected to the router rt-1;
# - host hs-2 is directly connected to the router rt-2.
setup_hs 1 1 100 #args: host router tenant
setup_hs 2 2 100
# setup two hosts for the tenant 200
# - host hs-3 is directly connected to the router rt-1;
# - host hs-4 is directly connected to the router rt-2.
setup_hs 3 1 200
setup_hs 4 2 200
# setup the IPv4 L3 VPN which connects the host hs-t100-1 and host
# hs-t100-2 within the same tenant 100.
setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant
setup_vpn_config 2 2 1 1 100
# setup the IPv4 L3 VPN which connects the host hs-t200-3 and host
# hs-t200-4 within the same tenant 200.
setup_vpn_config 3 1 4 2 200
setup_vpn_config 4 2 3 1 200
}
check_rt_connectivity()
{
local rtsrc=$1
local rtdst=$2
ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \
>/dev/null 2>&1
}
check_and_log_rt_connectivity()
{
local rtsrc=$1
local rtdst=$2
check_rt_connectivity ${rtsrc} ${rtdst}
log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
}
check_hs_connectivity()
{
local hssrc=$1
local hsdst=$2
local tid=$3
ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \
${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1
}
check_and_log_hs_connectivity()
{
local hssrc=$1
local hsdst=$2
local tid=$3
check_hs_connectivity ${hssrc} ${hsdst} ${tid}
log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})"
}
check_and_log_hs_isolation()
{
local hssrc=$1
local tidsrc=$2
local hsdst=$3
local tiddst=$4
check_hs_connectivity ${hssrc} ${hsdst} ${tidsrc}
# NOTE: ping should fail
log_test $? 1 "Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}"
}
check_and_log_hs2gw_connectivity()
{
local hssrc=$1
local tid=$2
check_hs_connectivity ${hssrc} 254 ${tid}
log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})"
}
router_tests()
{
log_section "IPv6 routers connectivity test"
check_and_log_rt_connectivity 1 2
check_and_log_rt_connectivity 2 1
}
host2gateway_tests()
{
log_section "IPv4 connectivity test among hosts and gateway"
check_and_log_hs2gw_connectivity 1 100
check_and_log_hs2gw_connectivity 2 100
check_and_log_hs2gw_connectivity 3 200
check_and_log_hs2gw_connectivity 4 200
}
host_vpn_tests()
{
log_section "SRv6 VPN connectivity test among hosts in the same tenant"
check_and_log_hs_connectivity 1 2 100
check_and_log_hs_connectivity 2 1 100
check_and_log_hs_connectivity 3 4 200
check_and_log_hs_connectivity 4 3 200
}
host_vpn_isolation_tests()
{
local i
local j
local k
local tmp
local l1="1 2"
local l2="3 4"
local t1=100
local t2=200
log_section "SRv6 VPN isolation test among hosts in different tentants"
for k in 0 1; do
for i in ${l1}; do
for j in ${l2}; do
check_and_log_hs_isolation ${i} ${t1} ${j} ${t2}
done
done
# let us test the reverse path
tmp="${l1}"; l1="${l2}"; l2="${tmp}"
tmp=${t1}; t1=${t2}; t2=${tmp}
done
}
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
exit 0
fi
if [ ! -x "$(command -v ip)" ]; then
echo "SKIP: Could not run test without ip tool"
exit 0
fi
modprobe vrf &>/dev/null
if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
echo "SKIP: vrf sysctl does not exist"
exit 0
fi
cleanup &>/dev/null
setup
router_tests
host2gateway_tests
host_vpn_tests
host_vpn_isolation_tests
print_log_test_results
cleanup &>/dev/null
exit ${ret}

View file

@ -0,0 +1,502 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# author: Andrea Mayer <andrea.mayer@uniroma2.it>
# author: Paolo Lungaroni <paolo.lungaroni@cnit.it>
# This test is designed for evaluating the new SRv6 End.DT6 behavior used for
# implementing IPv6 L3 VPN use cases.
#
# Hereafter a network diagram is shown, where two different tenants (named 100
# and 200) offer IPv6 L3 VPN services allowing hosts to communicate with each
# other across an IPv6 network.
#
# Only hosts belonging to the same tenant (and to the same VPN) can communicate
# with each other. Instead, the communication among hosts of different tenants
# is forbidden.
# In other words, hosts hs-t100-1 and hs-t100-2 are connected through the IPv6
# L3 VPN of tenant 100 while hs-t200-3 and hs-t200-4 are connected using the
# IPv6 L3 VPN of tenant 200. Cross connection between tenant 100 and tenant 200
# is forbidden and thus, for example, hs-t100-1 cannot reach hs-t200-3 and vice
# versa.
#
# Routers rt-1 and rt-2 implement IPv6 L3 VPN services leveraging the SRv6
# architecture. The key components for such VPNs are: a) SRv6 Encap behavior,
# b) SRv6 End.DT6 behavior and c) VRF.
#
# To explain how an IPv6 L3 VPN based on SRv6 works, let us briefly consider an
# example where, within the same domain of tenant 100, the host hs-t100-1 pings
# the host hs-t100-2.
#
# First of all, L2 reachability of the host hs-t100-2 is taken into account by
# the router rt-1 which acts as a ndp proxy.
#
# When the host hs-t100-1 sends an IPv6 packet destined to hs-t100-2, the
# router rt-1 receives the packet on the internal veth-t100 interface. Such
# interface is enslaved to the VRF vrf-100 whose associated table contains the
# SRv6 Encap route for encapsulating any IPv6 packet in a IPv6 plus the Segment
# Routing Header (SRH) packet. This packet is sent through the (IPv6) core
# network up to the router rt-2 that receives it on veth0 interface.
#
# The rt-2 router uses the 'localsid' routing table to process incoming
# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these
# packets, the SRv6 End.DT6 behavior removes the outer IPv6+SRH headers and
# performs the lookup on the vrf-100 table using the destination address of
# the decapsulated IPv6 packet. Afterwards, the packet is sent to the host
# hs-t100-2 through the veth-t100 interface.
#
# The ping response follows the same processing but this time the role of rt-1
# and rt-2 are swapped.
#
# Of course, the IPv6 L3 VPN for tenant 200 works exactly as the IPv6 L3 VPN
# for tenant 100. In this case, only hosts hs-t200-3 and hs-t200-4 are able to
# connect with each other.
#
#
# +-------------------+ +-------------------+
# | | | |
# | hs-t100-1 netns | | hs-t100-2 netns |
# | | | |
# | +-------------+ | | +-------------+ |
# | | veth0 | | | | veth0 | |
# | | cafe::1/64 | | | | cafe::2/64 | |
# | +-------------+ | | +-------------+ |
# | . | | . |
# +-------------------+ +-------------------+
# . .
# . .
# . .
# +-----------------------------------+ +-----------------------------------+
# | . | | . |
# | +---------------+ | | +---------------- |
# | | veth-t100 | | | | veth-t100 | |
# | | cafe::254/64 | +----------+ | | +----------+ | cafe::254/64 | |
# | +-------+-------+ | localsid | | | | localsid | +-------+-------- |
# | | | table | | | | table | | |
# | +----+----+ +----------+ | | +----------+ +----+----+ |
# | | vrf-100 | | | | vrf-100 | |
# | +---------+ +------------+ | | +------------+ +---------+ |
# | | veth0 | | | | veth0 | |
# | | fd00::1/64 |.|...|.| fd00::2/64 | |
# | +---------+ +------------+ | | +------------+ +---------+ |
# | | vrf-200 | | | | vrf-200 | |
# | +----+----+ | | +----+----+ |
# | | | | | |
# | +-------+-------+ | | +-------+-------- |
# | | veth-t200 | | | | veth-t200 | |
# | | cafe::254/64 | | | | cafe::254/64 | |
# | +---------------+ rt-1 netns | | rt-2 netns +---------------- |
# | . | | . |
# +-----------------------------------+ +-----------------------------------+
# . .
# . .
# . .
# . .
# +-------------------+ +-------------------+
# | . | | . |
# | +-------------+ | | +-------------+ |
# | | veth0 | | | | veth0 | |
# | | cafe::3/64 | | | | cafe::4/64 | |
# | +-------------+ | | +-------------+ |
# | | | |
# | hs-t200-3 netns | | hs-t200-4 netns |
# | | | |
# +-------------------+ +-------------------+
#
#
# ~~~~~~~~~~~~~~~~~~~~~~~~~
# | Network configuration |
# ~~~~~~~~~~~~~~~~~~~~~~~~~
#
# rt-1: localsid table (table 90)
# +-------------------------------------------------+
# |SID |Action |
# +-------------------------------------------------+
# |fc00:21:100::6006|apply SRv6 End.DT6 vrftable 100|
# +-------------------------------------------------+
# |fc00:21:200::6006|apply SRv6 End.DT6 vrftable 200|
# +-------------------------------------------------+
#
# rt-1: VRF tenant 100 (table 100)
# +---------------------------------------------------+
# |host |Action |
# +---------------------------------------------------+
# |cafe::2 |apply seg6 encap segs fc00:12:100::6006|
# +---------------------------------------------------+
# |cafe::/64 |forward to dev veth_t100 |
# +---------------------------------------------------+
#
# rt-1: VRF tenant 200 (table 200)
# +---------------------------------------------------+
# |host |Action |
# +---------------------------------------------------+
# |cafe::4 |apply seg6 encap segs fc00:12:200::6006|
# +---------------------------------------------------+
# |cafe::/64 |forward to dev veth_t200 |
# +---------------------------------------------------+
#
#
# rt-2: localsid table (table 90)
# +-------------------------------------------------+
# |SID |Action |
# +-------------------------------------------------+
# |fc00:12:100::6006|apply SRv6 End.DT6 vrftable 100|
# +-------------------------------------------------+
# |fc00:12:200::6006|apply SRv6 End.DT6 vrftable 200|
# +-------------------------------------------------+
#
# rt-2: VRF tenant 100 (table 100)
# +---------------------------------------------------+
# |host |Action |
# +---------------------------------------------------+
# |cafe::1 |apply seg6 encap segs fc00:21:100::6006|
# +---------------------------------------------------+
# |cafe::/64 |forward to dev veth_t100 |
# +---------------------------------------------------+
#
# rt-2: VRF tenant 200 (table 200)
# +---------------------------------------------------+
# |host |Action |
# +---------------------------------------------------+
# |cafe::3 |apply seg6 encap segs fc00:21:200::6006|
# +---------------------------------------------------+
# |cafe::/64 |forward to dev veth_t200 |
# +---------------------------------------------------+
#
readonly LOCALSID_TABLE_ID=90
readonly IPv6_RT_NETWORK=fd00
readonly IPv6_HS_NETWORK=cafe
readonly VPN_LOCATOR_SERVICE=fc00
PING_TIMEOUT_SEC=4
ret=0
PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
log_test()
{
local rc=$1
local expected=$2
local msg="$3"
if [ ${rc} -eq ${expected} ]; then
nsuccess=$((nsuccess+1))
printf "\n TEST: %-60s [ OK ]\n" "${msg}"
else
ret=1
nfail=$((nfail+1))
printf "\n TEST: %-60s [FAIL]\n" "${msg}"
if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
echo
echo "hit enter to continue, 'q' to quit"
read a
[ "$a" = "q" ] && exit 1
fi
fi
}
print_log_test_results()
{
if [ "$TESTS" != "none" ]; then
printf "\nTests passed: %3d\n" ${nsuccess}
printf "Tests failed: %3d\n" ${nfail}
fi
}
log_section()
{
echo
echo "################################################################################"
echo "TEST SECTION: $*"
echo "################################################################################"
}
cleanup()
{
ip link del veth-rt-1 2>/dev/null || true
ip link del veth-rt-2 2>/dev/null || true
# destroy routers rt-* and hosts hs-*
for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do
ip netns del ${ns} || true
done
}
# Setup the basic networking for the routers
setup_rt_networking()
{
local rt=$1
local nsname=rt-${rt}
ip netns add ${nsname}
ip link set veth-rt-${rt} netns ${nsname}
ip -netns ${nsname} link set veth-rt-${rt} name veth0
ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad
ip -netns ${nsname} link set veth0 up
ip -netns ${nsname} link set lo up
ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1
}
setup_hs()
{
local hs=$1
local rt=$2
local tid=$3
local hsname=hs-t${tid}-${hs}
local rtname=rt-${rt}
local rtveth=veth-t${tid}
# set the networking for the host
ip netns add ${hsname}
ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
ip -netns ${hsname} link set ${rtveth} netns ${rtname}
ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad
ip -netns ${hsname} link set veth0 up
ip -netns ${hsname} link set lo up
# configure the VRF for the tenant X on the router which is directly
# connected to the source host.
ip -netns ${rtname} link add vrf-${tid} type vrf table ${tid}
ip -netns ${rtname} link set vrf-${tid} up
ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0
# enslave the veth-tX interface to the vrf-X in the access router
ip -netns ${rtname} link set ${rtveth} master vrf-${tid}
ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::254/64 dev ${rtveth} nodad
ip -netns ${rtname} link set ${rtveth} up
ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1
ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
}
setup_vpn_config()
{
local hssrc=$1
local rtsrc=$2
local hsdst=$3
local rtdst=$4
local tid=$5
local hssrc_name=hs-t${tid}-${hssrc}
local hsdst_name=hs-t${tid}-${hsdst}
local rtsrc_name=rt-${rtsrc}
local rtdst_name=rt-${rtdst}
local rtveth=veth-t${tid}
local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6006
ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth}
# set the encap route for encapsulating packets which arrive from the
# host hssrc and destined to the access router rtsrc.
ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 vrf vrf-${tid} \
encap seg6 mode encap segs ${vpn_sid} dev veth0
ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 vrf vrf-${tid} \
via fd00::${rtdst} dev veth0
# set the decap route for decapsulating packets which arrive from
# the rtdst router and destined to the hsdst host.
ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 table ${LOCALSID_TABLE_ID} \
encap seg6local action End.DT6 vrftable ${tid} dev vrf-${tid}
# all sids for VPNs start with a common locator which is fc00::/16.
# Routes for handling the SRv6 End.DT6 behavior instances are grouped
# together in the 'localsid' table.
#
# NOTE: added only once
if [ -z "$(ip -netns ${rtdst_name} -6 rule show | \
grep "to ${VPN_LOCATOR_SERVICE}::/16 lookup ${LOCALSID_TABLE_ID}")" ]; then
ip -netns ${rtdst_name} -6 rule add \
to ${VPN_LOCATOR_SERVICE}::/16 \
lookup ${LOCALSID_TABLE_ID} prio 999
fi
}
setup()
{
ip link add veth-rt-1 type veth peer name veth-rt-2
# setup the networking for router rt-1 and router rt-2
setup_rt_networking 1
setup_rt_networking 2
# setup two hosts for the tenant 100.
# - host hs-1 is directly connected to the router rt-1;
# - host hs-2 is directly connected to the router rt-2.
setup_hs 1 1 100 #args: host router tenant
setup_hs 2 2 100
# setup two hosts for the tenant 200
# - host hs-3 is directly connected to the router rt-1;
# - host hs-4 is directly connected to the router rt-2.
setup_hs 3 1 200
setup_hs 4 2 200
# setup the IPv6 L3 VPN which connects the host hs-t100-1 and host
# hs-t100-2 within the same tenant 100.
setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant
setup_vpn_config 2 2 1 1 100
# setup the IPv6 L3 VPN which connects the host hs-t200-3 and host
# hs-t200-4 within the same tenant 200.
setup_vpn_config 3 1 4 2 200
setup_vpn_config 4 2 3 1 200
}
check_rt_connectivity()
{
local rtsrc=$1
local rtdst=$2
ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \
>/dev/null 2>&1
}
check_and_log_rt_connectivity()
{
local rtsrc=$1
local rtdst=$2
check_rt_connectivity ${rtsrc} ${rtdst}
log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
}
check_hs_connectivity()
{
local hssrc=$1
local hsdst=$2
local tid=$3
ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \
${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1
}
check_and_log_hs_connectivity()
{
local hssrc=$1
local hsdst=$2
local tid=$3
check_hs_connectivity ${hssrc} ${hsdst} ${tid}
log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})"
}
check_and_log_hs_isolation()
{
local hssrc=$1
local tidsrc=$2
local hsdst=$3
local tiddst=$4
check_hs_connectivity ${hssrc} ${hsdst} ${tidsrc}
# NOTE: ping should fail
log_test $? 1 "Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}"
}
check_and_log_hs2gw_connectivity()
{
local hssrc=$1
local tid=$2
check_hs_connectivity ${hssrc} 254 ${tid}
log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})"
}
router_tests()
{
log_section "IPv6 routers connectivity test"
check_and_log_rt_connectivity 1 2
check_and_log_rt_connectivity 2 1
}
host2gateway_tests()
{
log_section "IPv6 connectivity test among hosts and gateway"
check_and_log_hs2gw_connectivity 1 100
check_and_log_hs2gw_connectivity 2 100
check_and_log_hs2gw_connectivity 3 200
check_and_log_hs2gw_connectivity 4 200
}
host_vpn_tests()
{
log_section "SRv6 VPN connectivity test among hosts in the same tenant"
check_and_log_hs_connectivity 1 2 100
check_and_log_hs_connectivity 2 1 100
check_and_log_hs_connectivity 3 4 200
check_and_log_hs_connectivity 4 3 200
}
host_vpn_isolation_tests()
{
local i
local j
local k
local tmp
local l1="1 2"
local l2="3 4"
local t1=100
local t2=200
log_section "SRv6 VPN isolation test among hosts in different tentants"
for k in 0 1; do
for i in ${l1}; do
for j in ${l2}; do
check_and_log_hs_isolation ${i} ${t1} ${j} ${t2}
done
done
# let us test the reverse path
tmp="${l1}"; l1="${l2}"; l2="${tmp}"
tmp=${t1}; t1=${t2}; t2=${tmp}
done
}
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
exit 0
fi
if [ ! -x "$(command -v ip)" ]; then
echo "SKIP: Could not run test without ip tool"
exit 0
fi
modprobe vrf &>/dev/null
if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
echo "SKIP: vrf sysctl does not exist"
exit 0
fi
cleanup &>/dev/null
setup
router_tests
host2gateway_tests
host_vpn_tests
host_vpn_isolation_tests
print_log_test_results
cleanup &>/dev/null
exit ${ret}