Introduce nexthop objects and new routing KPI.

This is the foundational change for the routing subsytem rearchitecture.
 More details and goals are available in https://reviews.freebsd.org/D24141 .

This patch introduces concept of nexthop objects and new nexthop-based
 routing KPI.

Nexthops are objects, containing all necessary information for performing
 the packet output decision. Output interface, mtu, flags, gw address goes
 there. For most of the cases, these objects will serve the same role as
 the struct rtentry is currently serving.
Typically there will be low tens of such objects for the router even with
 multiple BGP full-views, as these objects will be shared between routing
 entries. This allows to store more information in the nexthop.

New KPI:

struct nhop_object *fib4_lookup(uint32_t fibnum, struct in_addr dst,
  uint32_t scopeid, uint32_t flags, uint32_t flowid);
struct nhop_object *fib6_lookup(uint32_t fibnum, const struct in6_addr *dst6,
  uint32_t scopeid, uint32_t flags, uint32_t flowid);

These 2 function are intended to replace all all flavours of
 <in_|in6_>rtalloc[1]<_ign><_fib>, mpath functions  and the previous
 fib[46]-generation functions.

Upon successful lookup, they return nexthop object which is guaranteed to
 exist within current NET_EPOCH. If longer lifetime is desired, one can
 specify NHR_REF as a flag and get a referenced version of the nexthop.
 Reference semantic closely resembles rtentry one, allowing sed-style conversion.

Additionally, another 2 functions are introduced to support uRPF functionality
 inside variety of our firewalls. Their primary goal is to hide the multipath
 implementation details inside the routing subsystem, greatly simplifying
 firewalls implementation:

int fib4_lookup_urpf(uint32_t fibnum, struct in_addr dst, uint32_t scopeid,
  uint32_t flags, const struct ifnet *src_if);
int fib6_lookup_urpf(uint32_t fibnum, const struct in6_addr *dst6, uint32_t scopeid,
  uint32_t flags, const struct ifnet *src_if);

All functions have a separate scopeid argument, paving way to eliminating IPv6 scope
 embedding and allowing to support IPv4 link-locals in the future.

Structure changes:
 * rtentry gets new 'rt_nhop' pointer, slightly growing the overall size.
 * rib_head gets new 'rnh_preadd' callback pointer, slightly growing overall sz.

Old KPI:
During the transition state old and new KPI will coexists. As there are another 4-5
 decent-sized conversion patches, it will probably take a couple of weeks.
To support both KPIs, fields not required by the new KPI (most of rtentry) has to be
 kept, resulting in the temporary size increase.
Once conversion is finished, rtentry will notably shrink.

More details:
* architectural overview: https://reviews.freebsd.org/D24141
* list of the next changes: https://reviews.freebsd.org/D24232

Reviewed by:	ae,glebius(initial version)
Differential Revision:	https://reviews.freebsd.org/D24232
This commit is contained in:
Alexander V. Chernikov 2020-04-12 14:30:00 +00:00
parent 07ddae2822
commit a666325282
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=359823
33 changed files with 3471 additions and 176 deletions

View file

@ -208,6 +208,8 @@
net
altq
..
route
..
..
net80211
..

View file

@ -53,6 +53,7 @@ LSUBDIRS= cam/ata cam/mmc cam/nvme cam/scsi \
geom/mirror geom/mountver geom/multipath geom/nop \
geom/raid geom/raid3 geom/shsec geom/stripe geom/virstor \
net/altq \
net/route \
netgraph/atm netgraph/netflow \
netinet/cc \
netinet/netdump \

View file

@ -563,6 +563,7 @@ The fifth, sixth, and seventh level names are as follows:
.It Dv NET_RT_IFLIST Ta 0 or if_index Ta None
.It Dv NET_RT_IFMALIST Ta 0 or if_index Ta None
.It Dv NET_RT_IFLISTL Ta 0 or if_index Ta None
.It Dv NET_RT_NHOPS Ta None Ta fib number
.El
.Pp
The
@ -583,6 +584,9 @@ uses 'l' versions of the message header structures:
.Va struct if_msghdrl
and
.Va struct ifa_msghdrl .
.Pp
.Dv NET_RT_NHOPS
returns all nexthops for specified address family in given fib.
.It Li PF_INET
Get or set various global information about the IPv4
(Internet Protocol version 4).

View file

@ -4091,6 +4091,11 @@ net/raw_cb.c standard
net/raw_usrreq.c standard
net/route.c standard
net/route_temporal.c standard
net/route/nhop.c standard
net/route/nhop_ctl.c standard
net/route/nhop_utils.c standard
net/route/route_ctl.c standard
net/route/route_helpers.c standard
net/rss_config.c optional inet rss | inet6 rss
net/rtsock.c standard
net/slcompress.c optional netgraph_vjc | sppp | \

View file

@ -211,7 +211,7 @@ rt_mpath_conflict(struct rib_head *rnh, struct rtentry *rt,
return (0);
}
static struct rtentry *
struct rtentry *
rt_mpath_selectrte(struct rtentry *rte, uint32_t hash)
{
struct radix_node *rn0, *rn;

View file

@ -56,10 +56,27 @@ int rt_mpath_conflict(struct rib_head *, struct rtentry *,
struct sockaddr *);
void rtalloc_mpath_fib(struct route *, u_int32_t, u_int);
struct rtentry *rt_mpath_select(struct rtentry *, uint32_t);
struct rtentry *rt_mpath_selectrte(struct rtentry *, uint32_t);
int rt_mpath_deldup(struct rtentry *, struct rtentry *);
int rn4_mpath_inithead(void **, int, u_int);
int rn6_mpath_inithead(void **, int, u_int);
static inline struct rtentry *
rt_mpath_next(struct rtentry *rt)
{
struct radix_node *next, *rn;
rn = (struct radix_node *)rt;
if (!rn->rn_dupedkey)
return (NULL);
next = rn->rn_dupedkey;
if (rn->rn_mask == next->rn_mask)
return (struct rtentry *)next;
else
return (NULL);
}
#endif
#endif /* _NET_RADIX_MPATH_H_ */

View file

@ -62,6 +62,8 @@
#include <net/if_dl.h>
#include <net/route.h>
#include <net/route_var.h>
#include <net/route/nhop.h>
#include <net/route/shared.h>
#include <net/vnet.h>
#ifdef RADIX_MPATH
@ -108,10 +110,7 @@ VNET_DEFINE(u_int, rt_add_addr_allfibs) = 1;
SYSCTL_UINT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RWTUN | CTLFLAG_VNET,
&VNET_NAME(rt_add_addr_allfibs), 0, "");
VNET_PCPUSTAT_DEFINE_STATIC(struct rtstat, rtstat);
#define RTSTAT_ADD(name, val) \
VNET_PCPUSTAT_ADD(struct rtstat, rtstat, name, (val))
#define RTSTAT_INC(name) RTSTAT_ADD(name, 1)
VNET_PCPUSTAT_DEFINE(struct rtstat, rtstat);
VNET_PCPUSTAT_SYSINIT(rtstat);
#ifdef VIMAGE
@ -240,6 +239,7 @@ route_init(void)
rt_numfibs = RT_MAXFIBS;
if (rt_numfibs == 0)
rt_numfibs = 1;
nhops_init();
}
SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, NULL);
@ -377,6 +377,8 @@ rt_table_init(int offset, int family, u_int fibnum)
/* Init locks */
RIB_LOCK_INIT(rh);
nhops_init_rib(rh);
/* Finally, set base callbacks */
rh->rnh_addaddr = rn_addroute;
rh->rnh_deladdr = rn_delete;
@ -408,6 +410,8 @@ rt_table_destroy(struct rib_head *rh)
rn_walktree(&rh->rmhead.head, rt_freeentry, &rh->rmhead.head);
nhops_destroy_rib(rh);
/* Assume table is already empty */
RIB_LOCK_DESTROY(rh);
free(rh, M_RTABLE);
@ -586,6 +590,9 @@ rtfree(struct rtentry *rt)
*/
R_Free(rt_key(rt));
/* Unreference nexthop */
nhop_free(rt->rt_nhop);
/*
* and the rtentry itself of course
*/
@ -1400,6 +1407,7 @@ rt_updatemtu(struct ifnet *ifp)
RIB_WLOCK(rnh);
rnh->rnh_walktree(&rnh->head, if_updatemtu_cb, &ifmtu);
RIB_WUNLOCK(rnh);
nhops_update_ifmtu(rnh, ifp, ifmtu.mtu);
}
}
}
@ -1544,6 +1552,7 @@ int
rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
u_int fibnum)
{
struct epoch_tracker et;
const struct sockaddr *dst;
struct rib_head *rnh;
int error;
@ -1592,9 +1601,11 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
error = add_route(rnh, info, ret_nrt);
break;
case RTM_CHANGE:
NET_EPOCH_ENTER(et);
RIB_WLOCK(rnh);
error = change_route(rnh, info, ret_nrt);
RIB_WUNLOCK(rnh);
NET_EPOCH_EXIT(et);
break;
default:
error = EOPNOTSUPP;
@ -1609,9 +1620,11 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info,
{
struct sockaddr *dst, *ndst, *gateway, *netmask;
struct rtentry *rt, *rt_old;
struct nhop_object *nh;
struct radix_node *rn;
struct ifaddr *ifa;
int error, flags;
struct epoch_tracker et;
dst = info->rti_info[RTAX_DST];
gateway = info->rti_info[RTAX_GATEWAY];
@ -1631,18 +1644,30 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info,
} else {
ifa_ref(info->rti_ifa);
}
NET_EPOCH_ENTER(et);
error = nhop_create_from_info(rnh, info, &nh);
NET_EPOCH_EXIT(et);
if (error != 0) {
ifa_free(info->rti_ifa);
return (error);
}
rt = uma_zalloc(V_rtzone, M_NOWAIT);
if (rt == NULL) {
ifa_free(info->rti_ifa);
nhop_free(nh);
return (ENOBUFS);
}
rt->rt_flags = RTF_UP | flags;
rt->rt_fibnum = rnh->rib_fibnum;
rt->rt_nhop = nh;
/*
* Add the gateway. Possibly re-malloc-ing the storage for it.
*/
if ((error = rt_setgate(rt, dst, gateway)) != 0) {
ifa_free(info->rti_ifa);
nhop_free(nh);
uma_zfree(V_rtzone, rt);
return (error);
}
@ -1682,6 +1707,7 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info,
ifa_free(rt->rt_ifa);
R_Free(rt_key(rt));
nhop_free(nh);
uma_zfree(V_rtzone, rt);
return (EEXIST);
}
@ -1723,6 +1749,7 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info,
if (rn == NULL) {
ifa_free(rt->rt_ifa);
R_Free(rt_key(rt));
nhop_free(nh);
uma_zfree(V_rtzone, rt);
return (EEXIST);
}
@ -1802,6 +1829,7 @@ change_route(struct rib_head *rnh, struct rt_addrinfo *info,
int error = 0;
int free_ifa = 0;
int family, mtu;
struct nhop_object *nh;
struct if_mtuinfo ifmtu;
RIB_WLOCK_ASSERT(rnh);
@ -1824,6 +1852,7 @@ change_route(struct rib_head *rnh, struct rt_addrinfo *info,
}
#endif
nh = NULL;
RT_LOCK(rt);
rt_setmetrics(info, rt);
@ -1852,6 +1881,10 @@ change_route(struct rib_head *rnh, struct rt_addrinfo *info,
goto bad;
}
error = nhop_create_from_nhop(rnh, rt->rt_nhop, info, &nh);
if (error != 0)
goto bad;
/* Check if outgoing interface has changed */
if (info->rti_ifa != NULL && info->rti_ifa != rt->rt_ifa &&
rt->rt_ifa != NULL) {
@ -1897,6 +1930,11 @@ change_route(struct rib_head *rnh, struct rt_addrinfo *info,
}
}
/* Update nexthop */
nhop_free(rt->rt_nhop);
rt->rt_nhop = nh;
nh = NULL;
/*
* This route change may have modified the route's gateway. In that
* case, any inpcbs that have cached this route need to invalidate their
@ -1910,6 +1948,8 @@ change_route(struct rib_head *rnh, struct rt_addrinfo *info,
}
bad:
RT_UNLOCK(rt);
if (nh != NULL)
nhop_free(nh);
if (free_ifa != 0) {
ifa_free(info->rti_ifa);
info->rti_ifa = NULL;

View file

@ -90,7 +90,8 @@ struct rt_metrics {
u_long rmx_rttvar; /* estimated rtt variance */
u_long rmx_pksent; /* packets sent using this route */
u_long rmx_weight; /* route weight */
u_long rmx_filler[3]; /* will be used for T/TCP later */
u_long rmx_nhidx; /* route nexhop index */
u_long rmx_filler[2]; /* will be used for T/TCP later */
};
/*
@ -150,6 +151,7 @@ struct rtentry {
struct sockaddr *rt_gateway; /* value */
struct ifnet *rt_ifp; /* the answer: interface to use */
struct ifaddr *rt_ifa; /* the answer: interface address to use */
struct nhop_object *rt_nhop; /* nexthop data */
int rt_flags; /* up/down?, host/net */
int rt_refcnt; /* # held references */
u_int rt_fibnum; /* which FIB */
@ -215,9 +217,13 @@ struct rtentry {
#define NHF_HOST 0x0400 /* RTF_HOST */
/* Nexthop request flags */
#define NHR_NONE 0x00 /* empty flags field */
#define NHR_IFAIF 0x01 /* Return ifa_ifp interface */
#define NHR_REF 0x02 /* For future use */
/* uRPF */
#define NHR_NODEFAULT 0x04 /* do not consider default route */
/* Control plane route request flags */
#define NHR_COPY 0x100 /* Copy rte data */
@ -245,6 +251,8 @@ struct rtstat {
uint64_t rts_newgateway; /* routes modified by redirects */
uint64_t rts_unreach; /* lookups which failed */
uint64_t rts_wildcard; /* lookups satisfied by a wildcard */
uint64_t rts_nh_idx_alloc_failure; /* nexthop index alloc failure*/
uint64_t rts_nh_alloc_failure; /* nexthop allocation failure*/
};
/*
@ -507,6 +515,8 @@ int rib_add_redirect(u_int fibnum, struct sockaddr *dst,
struct sockaddr *gateway, struct sockaddr *author, struct ifnet *ifp,
int flags, int expire_sec);
/* New API */
void rib_walk(int af, u_int fibnum, rt_walktree_f_t *wa_f, void *arg);
#endif
#endif

388
sys/net/route/nhop.c Normal file
View file

@ -0,0 +1,388 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2020 Alexander V. Chernikov
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_route.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
#include <sys/rwlock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/kernel.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/route.h>
#include <net/route_var.h>
#include <net/route/nhop_utils.h>
#include <net/route/nhop.h>
#include <net/route/nhop_var.h>
#include <net/route/shared.h>
#include <net/vnet.h>
/*
* This file contains data structures management logic for the nexthop ("nhop")
* route subsystem.
*
* Nexthops in the original sense are the objects containing all the necessary
* information to forward the packet to the selected destination.
* In particular, nexthop is defined by a combination of
* ifp, ifa, aifp, mtu, gw addr(if set), nh_type, nh_family, mask of rt_flags and
* NHF_DEFAULT
*
* All nexthops are stored in the resizable hash table.
* Additionally, each nexthop gets assigned its unique index (nexthop index)
* so userland programs can interact with the nexthops easier. Index allocation
* is backed by the bitmask array.
*/
static MALLOC_DEFINE(M_NHOP, "nhops", "nexthops data");
/* Hash management functions */
int
nhops_init_rib(struct rib_head *rh)
{
struct nh_control *ctl;
size_t alloc_size;
uint32_t num_buckets, num_items;
void *ptr;
ctl = malloc(sizeof(struct nh_control), M_NHOP, M_WAITOK | M_ZERO);
/*
* Allocate nexthop hash. Start with 16 items by default (128 bytes).
* This will be enough for most of the cases.
*/
num_buckets = 16;
alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets);
ptr = malloc(alloc_size, M_NHOP, M_WAITOK | M_ZERO);
CHT_SLIST_INIT(&ctl->nh_head, ptr, num_buckets);
/*
* Allocate nexthop index bitmask.
*/
num_items = 128 * 8; /* 128 bytes */
ptr = malloc(bitmask_get_size(num_items), M_NHOP, M_WAITOK | M_ZERO);
bitmask_init(&ctl->nh_idx_head, ptr, num_items);
NHOPS_LOCK_INIT(ctl);
rh->nh_control = ctl;
ctl->ctl_rh = rh;
DPRINTF("NHOPS init for fib %u af %u: ctl %p rh %p", rh->rib_fibnum,
rh->rib_family, ctl, rh);
return (0);
}
static void
destroy_ctl(struct nh_control *ctl)
{
NHOPS_LOCK_DESTROY(ctl);
free(ctl->nh_head.ptr, M_NHOP);
free(ctl->nh_idx_head.idx, M_NHOP);
free(ctl, M_NHOP);
}
/*
* Epoch callback indicating ctl is safe to destroy
*/
static void
destroy_ctl_epoch(epoch_context_t ctx)
{
struct nh_control *ctl;
ctl = __containerof(ctx, struct nh_control, ctl_epoch_ctx);
destroy_ctl(ctl);
}
void
nhops_destroy_rib(struct rib_head *rh)
{
struct nh_control *ctl;
struct nhop_priv *nh_priv;
ctl = rh->nh_control;
/*
* All routes should have been deleted in rt_table_destroy().
* However, TCP stack or other consumers may store referenced
* nexthop pointers. When these references go to zero,
* nhop_free() will try to unlink these records from the
* datastructures, most likely leading to panic.
*
* Avoid that by explicitly marking all of the remaining
* nexthops as unlinked by removing a reference from a special
* counter. Please see nhop_free() comments for more
* details.
*/
NHOPS_WLOCK(ctl);
CHT_SLIST_FOREACH(&ctl->nh_head, nhops, nh_priv) {
DPRINTF("Marking nhop %u unlinked", nh_priv->nh_idx);
refcount_release(&nh_priv->nh_linked);
} CHT_SLIST_FOREACH_END;
NHOPS_WUNLOCK(ctl);
/*
* Postpone destruction till the end of current epoch
* so nhop_free() can safely use nh_control pointer.
*/
epoch_call(net_epoch_preempt, destroy_ctl_epoch,
&ctl->ctl_epoch_ctx);
}
/*
* Nexhop hash calculation:
*
* Nexthops distribution:
* 2 "mandatory" nexthops per interface ("interface route", "loopback").
* For direct peering: 1 nexthop for the peering router per ifp/af.
* For Ix-like peering: tens to hundreds nexthops of neghbors per ifp/af.
* IGP control plane & broadcast segment: tens of nexthops per ifp/af.
*
* Each fib/af combination has its own hash table.
* With that in mind, hash nexthops by the combination of the interface
* and GW IP address.
*
* To optimize hash calculation, ignore higher bytes of ifindex, as they
* give very little entropy.
* Similarly, use lower 4 bytes of IPv6 address to distinguish between the
* neighbors.
*/
struct _hash_data {
uint16_t ifindex;
uint8_t family;
uint8_t nh_type;
uint32_t gw_addr;
};
static unsigned
djb_hash(const unsigned char *h, const int len)
{
unsigned int result = 0;
int i;
for (i = 0; i < len; i++)
result = 33 * result ^ h[i];
return (result);
}
static uint32_t
hash_priv(const struct nhop_priv *priv)
{
struct nhop_object *nh;
uint16_t ifindex;
struct _hash_data key;
nh = priv->nh;
ifindex = nh->nh_ifp->if_index & 0xFFFF;
memset(&key, 0, sizeof(key));
key.ifindex = ifindex;
key.family = nh->gw_sa.sa_family;
key.nh_type = priv->nh_type & 0xFF;
if (nh->gw_sa.sa_family == AF_INET6)
memcpy(&key.gw_addr, &nh->gw6_sa.sin6_addr.s6_addr32[3], 4);
else if (nh->gw_sa.sa_family == AF_INET)
memcpy(&key.gw_addr, &nh->gw4_sa.sin_addr, 4);
return (uint32_t)(djb_hash((const unsigned char *)&key, sizeof(key)));
}
/*
* Checks if hash needs resizing and performs this resize if necessary
*
*/
static void
consider_resize(struct nh_control *ctl, uint32_t new_nh_buckets, uint32_t new_idx_items)
{
void *nh_ptr, *nh_idx_ptr;
void *old_idx_ptr;
size_t alloc_size;
nh_ptr = NULL;
if (new_nh_buckets != 0) {
alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_nh_buckets);
nh_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO);
}
nh_idx_ptr = NULL;
if (new_idx_items != 0) {
alloc_size = bitmask_get_size(new_idx_items);
nh_idx_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO);
}
if (nh_ptr == NULL && nh_idx_ptr == NULL) {
/* Either resize is not required or allocations have failed. */
return;
}
DPRINTF("going to resize: nh:[ptr:%p sz:%u] idx:[ptr:%p sz:%u]", nh_ptr,
new_nh_buckets, nh_idx_ptr, new_idx_items);
old_idx_ptr = NULL;
NHOPS_WLOCK(ctl);
if (nh_ptr != NULL) {
CHT_SLIST_RESIZE(&ctl->nh_head, nhops, nh_ptr, new_nh_buckets);
}
if (nh_idx_ptr != NULL) {
if (bitmask_copy(&ctl->nh_idx_head, nh_idx_ptr, new_idx_items) == 0)
bitmask_swap(&ctl->nh_idx_head, nh_idx_ptr, new_idx_items, &old_idx_ptr);
}
NHOPS_WUNLOCK(ctl);
if (nh_ptr != NULL)
free(nh_ptr, M_NHOP);
if (old_idx_ptr != NULL)
free(old_idx_ptr, M_NHOP);
}
/*
* Links nextop @nh_priv to the nexhop hash table and allocates
* nexhop index.
* Returns allocated index or 0 on failure.
*/
int
link_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv)
{
uint16_t idx;
uint32_t num_buckets_new, num_items_new;
KASSERT((nh_priv->nh_idx == 0), ("nhop index is already allocated"));
NHOPS_WLOCK(ctl);
/*
* Check if we need to resize hash and index.
* The following 2 functions returns either new size or 0
* if resize is not required.
*/
num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->nh_head);
num_items_new = bitmask_get_resize_items(&ctl->nh_idx_head);
if (bitmask_alloc_idx(&ctl->nh_idx_head, &idx) != 0) {
NHOPS_WUNLOCK(ctl);
DPRINTF("Unable to allocate nhop index");
RTSTAT_INC(rts_nh_idx_alloc_failure);
consider_resize(ctl, num_buckets_new, num_items_new);
return (0);
}
nh_priv->nh_idx = idx;
nh_priv->nh_control = ctl;
CHT_SLIST_INSERT_HEAD(&ctl->nh_head, nhops, nh_priv);
NHOPS_WUNLOCK(ctl);
DPRINTF("Linked nhop priv %p to %d, hash %u, ctl %p", nh_priv, idx,
hash_priv(nh_priv), ctl);
consider_resize(ctl, num_buckets_new, num_items_new);
return (idx);
}
/*
* Unlinks nexthop specified by @nh_priv data from the hash.
*
* Returns found nexthop or NULL.
*/
struct nhop_priv *
unlink_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv_del)
{
struct nhop_priv *priv_ret;
int idx;
uint32_t num_buckets_new, num_items_new;
idx = 0;
NHOPS_WLOCK(ctl);
CHT_SLIST_REMOVE_BYOBJ(&ctl->nh_head, nhops, nh_priv_del, priv_ret);
if (priv_ret != NULL) {
idx = priv_ret->nh_idx;
priv_ret->nh_idx = 0;
KASSERT((idx != 0), ("bogus nhop index 0"));
if ((bitmask_free_idx(&ctl->nh_idx_head, idx)) != 0) {
DPRINTF("Unable to remove index %d from fib %u af %d",
idx, ctl->ctl_rh->rib_fibnum,
ctl->ctl_rh->rib_family);
}
}
/* Check if hash or index needs to be resized */
num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->nh_head);
num_items_new = bitmask_get_resize_items(&ctl->nh_idx_head);
NHOPS_WUNLOCK(ctl);
if (priv_ret == NULL)
DPRINTF("Unable to unlink nhop priv %p from hash, hash %u ctl %p",
nh_priv_del, hash_priv(nh_priv_del), ctl);
else
DPRINTF("Unlinked nhop %p priv idx %d", priv_ret, idx);
consider_resize(ctl, num_buckets_new, num_items_new);
return (priv_ret);
}
/*
* Searches for the nexthop by data specifcied in @nh_priv.
* Returns referenced nexthop or NULL.
*/
struct nhop_priv *
find_nhop(struct nh_control *ctl, const struct nhop_priv *nh_priv)
{
struct nhop_priv *nh_priv_ret;
NHOPS_RLOCK(ctl);
CHT_SLIST_FIND_BYOBJ(&ctl->nh_head, nhops, nh_priv, nh_priv_ret);
if (nh_priv_ret != NULL) {
if (refcount_acquire_if_not_zero(&nh_priv_ret->nh_refcnt) == 0){
/* refcount was 0 -> nhop is being deleted */
nh_priv_ret = NULL;
}
}
NHOPS_RUNLOCK(ctl);
return (nh_priv_ret);
}

229
sys/net/route/nhop.h Normal file
View file

@ -0,0 +1,229 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2020 Alexander V. Chernikov
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
/*
* This header file contains public definitions for the nexthop routing subsystem.
*/
#ifndef _NET_ROUTE_NHOP_H_
#define _NET_ROUTE_NHOP_H_
#include <netinet/in.h> /* sockaddr_in && sockaddr_in6 */
#include <sys/counter.h>
enum nhop_type {
NH_TYPE_IPV4_ETHER_RSLV = 1, /* IPv4 ethernet without GW */
NH_TYPE_IPV4_ETHER_NHOP = 2, /* IPv4 with pre-calculated ethernet encap */
NH_TYPE_IPV6_ETHER_RSLV = 3, /* IPv6 ethernet, without GW */
NH_TYPE_IPV6_ETHER_NHOP = 4 /* IPv6 with pre-calculated ethernet encap*/
};
#ifdef _KERNEL
/*
* Define shorter version of AF_LINK sockaddr.
*
* Currently the only use case of AF_LINK gateway is storing
* interface index of the interface of the source IPv6 address.
* This is used by the IPv6 code for the connections over loopback
* interface.
*
* The structure below copies 'struct sockaddr_dl', reducing the
* size of sdl_data buffer, as it is not used. This change
* allows to store the AF_LINK gateways in the nhop gateway itself,
* simplifying control plane handling.
*/
struct sockaddr_dl_short {
u_char sdl_len; /* Total length of sockaddr */
u_char sdl_family; /* AF_LINK */
u_short sdl_index; /* if != 0, system given index for interface */
u_char sdl_type; /* interface type */
u_char sdl_nlen; /* interface name length, no trailing 0 reqd. */
u_char sdl_alen; /* link level address length */
u_char sdl_slen; /* link layer selector length */
char sdl_data[8]; /* unused */
};
#define NHOP_RELATED_FLAGS \
(RTF_GATEWAY | RTF_HOST | RTF_REJECT | RTF_BLACKHOLE | \
RTF_FIXEDMTU | RTF_LOCAL | RTF_BROADCAST | RTF_MULTICAST)
struct nh_control;
struct nhop_priv;
/*
* Struct 'nhop_object' field description:
*
* nh_flags: NHF_ flags used in the dataplane code. NHF_GATEWAY or NHF_BLACKHOLE
* can be examples of such flags.
* nh_mtu: ready-to-use nexthop mtu. Already accounts for the link-level header,
* interface MTU and protocol-specific limitations.
* nh_prepend_len: link-level prepend length. Currently unused.
* nh_ifp: logical transmit interface. The one from which if_transmit() will be
* called. Guaranteed to be non-NULL.
* nh_aifp: ifnet of the source address. Same as nh_ifp except IPv6 loopback
* routes. See the example below.
* nh_ifa: interface address to use. Guaranteed to be non-NULL.
* nh_pksent: counter(9) reflecting the number of packets transmitted.
*
* gw_: storage suitable to hold AF_INET, AF_INET6 or AF_LINK gateway. More
* details ara available in the examples below.
*
* Examples:
*
* Direct routes (routes w/o gateway):
* NHF_GATEWAY is NOT set.
* nh_ifp denotes the logical transmit interface ().
* nh_aifp is the same as nh_ifp
* gw_sa contains AF_LINK sa with nh_aifp ifindex (compat)
* Loopback routes:
* NHF_GATEWAY is NOT set.
* nh_ifp points to the loopback interface (lo0).
* nh_aifp points to the interface where the destination address belongs to.
* This is useful in IPv6 link-local-over-loopback communications.
* gw_sa contains AF_LINK sa with nh_aifp ifindex (compat)
* GW routes:
* NHF_GATEWAY is set.
* nh_ifp denotes the logical transmit interface.
* nh_aifp is the same as nh_ifp
* gw_sa contains L3 address (either AF_INET or AF_INET6).
*
*
* Note: struct nhop_object fields are ordered in a way that
* supports memcmp-based comparisons.
*
*/
#define NHOP_END_CMP (__offsetof(struct nhop_object, nh_pksent))
struct nhop_object {
uint16_t nh_flags; /* nhop flags */
uint16_t nh_mtu; /* nexthop mtu */
union {
struct sockaddr_in gw4_sa; /* GW accessor as IPv4 */
struct sockaddr_in6 gw6_sa; /* GW accessor as IPv6 */
struct sockaddr gw_sa;
struct sockaddr_dl_short gwl_sa; /* AF_LINK gw (compat) */
char gw_buf[28];
};
struct ifnet *nh_ifp; /* Logical egress interface. Always != NULL */
struct ifaddr *nh_ifa; /* interface address to use. Always != NULL */
struct ifnet *nh_aifp; /* ifnet of the source address. Always != NULL */
counter_u64_t nh_pksent; /* packets sent using this nhop */
/* 32 bytes + 4xPTR == 64(amd64) / 48(i386) */
uint8_t nh_prepend_len; /* length of prepend data */
uint8_t spare[3];
uint32_t spare1; /* alignment */
char nh_prepend[48]; /* L2 prepend */
struct nhop_priv *nh_priv; /* control plane data */
/* -- 128 bytes -- */
};
/*
* Nhop validness.
*
* Currently we verify whether link is up or not on every packet, which can be
* quite costy.
* TODO: subscribe for the interface notifications and update the nexthops
* with NHF_INVALID flag.
*/
#define NH_IS_VALID(_nh) RT_LINK_IS_UP((_nh)->nh_ifp)
#define NH_IS_MULTIPATH(_nh) ((_nh)->nh_flags & NHF_MULTIPATH)
#define RT_GATEWAY(_rt) ((struct sockaddr *)&(_rt)->rt_nhop->gw4_sa)
#define RT_GATEWAY_CONST(_rt) ((const struct sockaddr *)&(_rt)->rt_nhop->gw4_sa)
#define NH_FREE(_nh) do { \
nhop_free(_nh); \
/* guard against invalid refs */ \
_nh = NULL; \
} while (0)
void nhop_free(struct nhop_object *nh);
struct sysctl_req;
struct sockaddr_dl;
struct rib_head;
uint32_t nhop_get_idx(const struct nhop_object *nh);
enum nhop_type nhop_get_type(const struct nhop_object *nh);
int nhop_get_rtflags(const struct nhop_object *nh);
int nhops_dump_sysctl(struct rib_head *rh, struct sysctl_req *w);
#endif /* _KERNEL */
/* Kernel <> userland structures */
/* Structure usage and layout are described in dump_nhop_entry() */
struct nhop_external {
uint32_t nh_len; /* length of the datastructure */
uint32_t nh_idx; /* Nexthop index */
uint32_t nh_fib; /* Fib nexhop is attached to */
uint32_t ifindex; /* transmit interface ifindex */
uint32_t aifindex; /* address ifindex */
uint8_t prepend_len; /* length of the prepend */
uint8_t nh_family; /* address family */
uint16_t nh_type; /* nexthop type */
uint16_t nh_mtu; /* nexthop mtu */
uint16_t nh_flags; /* nhop flags */
struct in_addr nh_addr; /* GW/DST IPv4 address */
struct in_addr nh_src; /* default source IPv4 address */
uint64_t nh_pksent;
/* control plane */
/* lookup key: address, family, type */
char nh_prepend[64]; /* L2 prepend */
uint64_t nh_refcount; /* number of references */
};
struct nhop_addrs {
uint32_t na_len; /* length of the datastructure */
uint16_t gw_sa_off; /* offset of gateway SA */
uint16_t src_sa_off; /* offset of src address SA */
};
struct mpath_nhop_external {
uint32_t nh_idx;
uint32_t nh_weight;
};
struct mpath_external {
uint32_t mp_idx;
uint32_t mp_refcount;
uint32_t mp_nh_count;
uint32_t mp_group_size;
};
#endif

827
sys/net/route/nhop_ctl.c Normal file
View file

@ -0,0 +1,827 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2020 Alexander V. Chernikov
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_route.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
#include <sys/rwlock.h>
#include <sys/malloc.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <sys/kernel.h>
#include <sys/epoch.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <net/route_var.h>
#include <net/route/nhop_utils.h>
#include <net/route/nhop.h>
#include <net/route/nhop_var.h>
#include <net/route/shared.h>
#include <net/vnet.h>
/*
* This file contains core functionality for the nexthop ("nhop") route subsystem.
* The business logic needed to create nexhop objects is implemented here.
*
* Nexthops in the original sense are the objects containing all the necessary
* information to forward the packet to the selected destination.
* In particular, nexthop is defined by a combination of
* ifp, ifa, aifp, mtu, gw addr(if set), nh_type, nh_family, mask of rt_flags and
* NHF_DEFAULT
*
* Additionally, each nexthop gets assigned its unique index (nexthop index).
* It serves two purposes: first one is to ease the ability of userland programs to
* reference nexthops by their index. The second one allows lookup algorithms to
* to store index instead of pointer (2 bytes vs 8) as a lookup result.
* All nexthops are stored in the resizable hash table.
*
* Basically, this file revolves around supporting 3 functions:
* 1) nhop_create_from_info / nhop_create_from_nhop, which contains all
* business logic on filling the nexthop fields based on the provided request.
* 2) nhop_get(), which gets a usable referenced nexthops.
*
* Conventions:
* 1) non-exported functions start with verb
* 2) exported function starts with the subsystem prefix: "nhop"
*/
static int dump_nhop_entry(struct rib_head *rh, struct nhop_object *nh, struct sysctl_req *w);
static struct nhop_priv *alloc_nhop_structure(void);
static int get_nhop(struct rib_head *rnh, struct rt_addrinfo *info,
struct nhop_priv **pnh_priv);
static int finalize_nhop(struct nh_control *ctl, struct rt_addrinfo *info,
struct nhop_priv *nh_priv);
static struct ifnet *get_aifp(const struct nhop_object *nh, int reference);
static void fill_sdl_from_ifp(struct sockaddr_dl_short *sdl, const struct ifnet *ifp);
static void destroy_nhop_epoch(epoch_context_t ctx);
static void destroy_nhop(struct nhop_priv *nh_priv);
static void print_nhop(const char *prefix, const struct nhop_object *nh);
_Static_assert(__offsetof(struct nhop_object, nh_ifp) == 32,
"nhop_object: wrong nh_ifp offset");
_Static_assert(sizeof(struct nhop_object) <= 128,
"nhop_object: size exceeds 128 bytes");
static uma_zone_t nhops_zone; /* Global zone for each and every nexthop */
#define NHOP_OBJECT_ALIGNED_SIZE roundup2(sizeof(struct nhop_object), \
2 * CACHE_LINE_SIZE)
#define NHOP_PRIV_ALIGNED_SIZE roundup2(sizeof(struct nhop_priv), \
2 * CACHE_LINE_SIZE)
void
nhops_init(void)
{
nhops_zone = uma_zcreate("routing nhops",
NHOP_OBJECT_ALIGNED_SIZE + NHOP_PRIV_ALIGNED_SIZE,
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
}
/*
* Fetches the interface of source address used by the route.
* In all cases except interface-address-route it would be the
* same as the transmit interfaces.
* However, for the interface address this function will return
* this interface ifp instead of loopback. This is needed to support
* link-local IPv6 loopback communications.
*
* If @reference is non-zero, found ifp is referenced.
*
* Returns found ifp.
*/
static struct ifnet *
get_aifp(const struct nhop_object *nh, int reference)
{
struct ifnet *aifp = NULL;
/*
* Adjust the "outgoing" interface. If we're going to loop
* the packet back to ourselves, the ifp would be the loopback
* interface. However, we'd rather know the interface associated
* to the destination address (which should probably be one of
* our own addresses).
*/
if ((nh->nh_ifp->if_flags & IFF_LOOPBACK) &&
nh->gw_sa.sa_family == AF_LINK) {
if (reference)
aifp = ifnet_byindex_ref(nh->gwl_sa.sdl_index);
else
aifp = ifnet_byindex(nh->gwl_sa.sdl_index);
if (aifp == NULL) {
DPRINTF("unable to get aifp for %s index %d",
if_name(nh->nh_ifp), nh->gwl_sa.sdl_index);
}
}
if (aifp == NULL) {
aifp = nh->nh_ifp;
if (reference)
if_ref(aifp);
}
return (aifp);
}
int
cmp_priv(const struct nhop_priv *_one, const struct nhop_priv *_two)
{
if (memcmp(_one->nh, _two->nh, NHOP_END_CMP) != 0)
return (0);
if ((_one->nh_type != _two->nh_type) ||
(_one->nh_family != _two->nh_family))
return (0);
return (1);
}
/*
* Conditionally sets @nh mtu data based on the @info data.
*/
static void
set_nhop_mtu_from_info(struct nhop_object *nh, const struct rt_addrinfo *info)
{
if (info->rti_mflags & RTV_MTU) {
if (info->rti_rmx->rmx_mtu != 0) {
/*
* MTU was explicitly provided by user.
* Keep it.
*/
nh->nh_priv->rt_flags |= RTF_FIXEDMTU;
} else {
/*
* User explicitly sets MTU to 0.
* Assume rollback to default.
*/
nh->nh_priv->rt_flags &= ~RTF_FIXEDMTU;
}
nh->nh_mtu = info->rti_rmx->rmx_mtu;
}
}
/*
* Fills in shorted link-level sockadd version suitable to be stored inside the
* nexthop gateway buffer.
*/
static void
fill_sdl_from_ifp(struct sockaddr_dl_short *sdl, const struct ifnet *ifp)
{
sdl->sdl_family = AF_LINK;
sdl->sdl_len = sizeof(struct sockaddr_dl_short);
sdl->sdl_index = ifp->if_index;
sdl->sdl_type = ifp->if_type;
}
static int
set_nhop_gw_from_info(struct nhop_object *nh, struct rt_addrinfo *info)
{
struct sockaddr *gw;
gw = info->rti_info[RTAX_GATEWAY];
if (info->rti_flags & RTF_GATEWAY) {
if (gw->sa_len > sizeof(struct sockaddr_in6)) {
DPRINTF("nhop SA size too big: AF %d len %u",
gw->sa_family, gw->sa_len);
return (ENOMEM);
}
memcpy(&nh->gw_sa, gw, gw->sa_len);
} else {
/*
* Interface route. Currently the route.c code adds
* sa of type AF_LINK, which is 56 bytes long. The only
* meaningful data there is the interface index. It is used
* used is the IPv6 loopback output, where we need to preserve
* the original interface to maintain proper scoping.
* Despite the fact that nexthop code stores original interface
* in the separate field (nh_aifp, see below), write AF_LINK
* compatible sa with shorter total length.
*/
fill_sdl_from_ifp(&nh->gwl_sa, nh->nh_ifp);
}
return (0);
}
static int
fill_nhop_from_info(struct nhop_priv *nh_priv, struct rt_addrinfo *info)
{
int error, rt_flags;
struct nhop_object *nh;
nh = nh_priv->nh;
rt_flags = info->rti_flags & NHOP_RT_FLAG_MASK;
nh->nh_priv->rt_flags = rt_flags;
nh_priv->nh_family = info->rti_info[RTAX_DST]->sa_family;
nh_priv->nh_type = 0; // hook responsibility to set nhop type
nh->nh_flags = fib_rte_to_nh_flags(rt_flags);
set_nhop_mtu_from_info(nh, info);
nh->nh_ifp = info->rti_ifa->ifa_ifp;
nh->nh_ifa = info->rti_ifa;
nh->nh_aifp = get_aifp(nh, 0);
if ((error = set_nhop_gw_from_info(nh, info)) != 0)
return (error);
/*
* Note some of the remaining data is set by the
* per-address-family pre-add hook.
*/
return (0);
}
/*
* Creates a new nexthop based on the information in @info.
*
* Returns:
* 0 on success, filling @nh_ret with the desired nexthop object ptr
* errno otherwise
*/
int
nhop_create_from_info(struct rib_head *rnh, struct rt_addrinfo *info,
struct nhop_object **nh_ret)
{
struct nhop_priv *nh_priv;
int error;
NET_EPOCH_ASSERT();
nh_priv = alloc_nhop_structure();
error = fill_nhop_from_info(nh_priv, info);
if (error != 0) {
uma_zfree(nhops_zone, nh_priv->nh);
return (error);
}
error = get_nhop(rnh, info, &nh_priv);
if (error == 0)
*nh_ret = nh_priv->nh;
return (error);
}
/*
* Gets linked nhop using the provided @pnh_priv nexhop data.
* If linked nhop is found, returns it, freeing the provided one.
* If there is no such nexthop, attaches the remaining data to the
* provided nexthop and links it.
*
* Returns 0 on success, storing referenced nexthop in @pnh_priv.
* Otherwise, errno is returned.
*/
static int
get_nhop(struct rib_head *rnh, struct rt_addrinfo *info,
struct nhop_priv **pnh_priv)
{
const struct sockaddr *dst, *gateway, *netmask;
struct nhop_priv *nh_priv, *tmp_priv;
int error;
nh_priv = *pnh_priv;
/* Give the protocols chance to augment the request data */
dst = info->rti_info[RTAX_DST];
netmask = info->rti_info[RTAX_NETMASK];
gateway = info->rti_info[RTAX_GATEWAY];
error = rnh->rnh_preadd(rnh->rib_fibnum, dst, netmask, nh_priv->nh);
if (error != 0) {
uma_zfree(nhops_zone, nh_priv->nh);
return (error);
}
tmp_priv = find_nhop(rnh->nh_control, nh_priv);
if (tmp_priv != NULL) {
uma_zfree(nhops_zone, nh_priv->nh);
*pnh_priv = tmp_priv;
return (0);
}
/*
* Existing nexthop not found, need to create new one.
* Note: multiple simultaneous get_nhop() requests
* can result in multiple equal nexhops existing in the
* nexthop table. This is not a not a problem until the
* relative number of such nexthops is significant, which
* is extremely unlikely.
*/
error = finalize_nhop(rnh->nh_control, info, nh_priv);
if (error != 0)
return (error);
return (0);
}
/*
* Update @nh with data supplied in @info.
* This is a helper function to support route changes.
*
* It limits the changes that can be done to the route to the following:
* 1) all combination of gateway changes (gw, interface, blackhole/reject)
* 2) route flags (FLAG[123],STATIC,BLACKHOLE,REJECT)
* 3) route MTU
*
* Returns:
* 0 on success
*/
static int
alter_nhop_from_info(struct nhop_object *nh, struct rt_addrinfo *info)
{
struct sockaddr *info_gw;
int error;
/* Update MTU if set in the request*/
set_nhop_mtu_from_info(nh, info);
/* XXX: allow only one of BLACKHOLE,REJECT,GATEWAY */
/* Allow some flags (FLAG1,STATIC,BLACKHOLE,REJECT) to be toggled on change. */
nh->nh_priv->rt_flags &= ~RTF_FMASK;
nh->nh_priv->rt_flags |= info->rti_flags & RTF_FMASK;
/* Consider gateway change */
info_gw = info->rti_info[RTAX_GATEWAY];
if (info_gw != NULL) {
error = set_nhop_gw_from_info(nh, info);
if (error != 0)
return (error);
/* Update RTF_GATEWAY flag status */
nh->nh_priv->rt_flags &= ~RTF_GATEWAY;
nh->nh_priv->rt_flags |= (RTF_GATEWAY & info->rti_flags);
}
/* Update datapath flags */
nh->nh_flags = fib_rte_to_nh_flags(nh->nh_priv->rt_flags);
if (info->rti_ifa != NULL)
nh->nh_ifa = info->rti_ifa;
if (info->rti_ifp != NULL)
nh->nh_ifp = info->rti_ifp;
nh->nh_aifp = get_aifp(nh, 0);
return (0);
}
/*
* Creates new nexthop based on @nh_orig and augmentation data from @info.
* Helper function used in the route changes, please see
* alter_nhop_from_info() comments for more details.
*
* Returns:
* 0 on success, filling @nh_ret with the desired nexthop object
* errno otherwise
*/
int
nhop_create_from_nhop(struct rib_head *rnh, const struct nhop_object *nh_orig,
struct rt_addrinfo *info, struct nhop_object **pnh)
{
struct nhop_priv *nh_priv;
struct nhop_object *nh;
int error;
NET_EPOCH_ASSERT();
nh_priv = alloc_nhop_structure();
nh = nh_priv->nh;
/* Start with copying data from original nexthop */
nh_priv->nh_family = nh_orig->nh_priv->nh_family;
nh_priv->rt_flags = nh_orig->nh_priv->rt_flags;
nh_priv->nh_type = nh_orig->nh_priv->nh_type;
nh->nh_ifp = nh_orig->nh_ifp;
nh->nh_ifa = nh_orig->nh_ifa;
nh->nh_aifp = nh_orig->nh_aifp;
nh->nh_mtu = nh_orig->nh_mtu;
nh->nh_flags = nh_orig->nh_flags;
memcpy(&nh->gw_sa, &nh_orig->gw_sa, nh_orig->gw_sa.sa_len);
error = alter_nhop_from_info(nh, info);
if (error != 0) {
uma_zfree(nhops_zone, nh_priv->nh);
return (error);
}
error = get_nhop(rnh, info, &nh_priv);
if (error == 0)
*pnh = nh_priv->nh;
return (error);
}
/*
* Allocates memory for public/private nexthop structures.
*
* Returns pointer to nhop_priv or NULL.
*/
static struct nhop_priv *
alloc_nhop_structure()
{
struct nhop_object *nh;
struct nhop_priv *nh_priv;
nh = (struct nhop_object *)uma_zalloc(nhops_zone, M_NOWAIT | M_ZERO);
if (nh == NULL)
return (NULL);
nh_priv = (struct nhop_priv *)((char *)nh + NHOP_OBJECT_ALIGNED_SIZE);
nh->nh_priv = nh_priv;
nh_priv->nh = nh;
return (nh_priv);
}
/*
* Alocates/references the remaining bits of nexthop data and links
* it to the hash table.
* Returns 0 if successful,
* errno otherwise. @nh_priv is freed in case of error.
*/
static int
finalize_nhop(struct nh_control *ctl, struct rt_addrinfo *info,
struct nhop_priv *nh_priv)
{
struct nhop_object *nh;
nh = nh_priv->nh;
/* Allocate per-cpu packet counter */
nh->nh_pksent = counter_u64_alloc(M_NOWAIT);
if (nh->nh_pksent == NULL) {
uma_zfree(nhops_zone, nh);
RTSTAT_INC(rts_nh_alloc_failure);
DPRINTF("nh_alloc_finalize failed");
return (ENOMEM);
}
/* Reference external objects and calculate (referenced) ifa */
if_ref(nh->nh_ifp);
ifa_ref(nh->nh_ifa);
nh->nh_aifp = get_aifp(nh, 1);
DPRINTF("AIFP: %p nh_ifp %p", nh->nh_aifp, nh->nh_ifp);
refcount_init(&nh_priv->nh_refcnt, 1);
/* Please see nhop_free() comments on the initial value */
refcount_init(&nh_priv->nh_linked, 2);
print_nhop("FINALIZE", nh);
if (link_nhop(ctl, nh_priv) == 0) {
/*
* Adding nexthop to the datastructures
* failed. Call destructor w/o waiting for
* the epoch end, as nexthop is not used
* and return.
*/
DPRINTF("link_nhop failed!");
destroy_nhop(nh_priv);
return (ENOBUFS);
}
return (0);
}
static void
print_nhop_sa(char *buf, size_t buflen, const struct sockaddr *sa)
{
if (sa->sa_family == AF_INET) {
const struct sockaddr_in *sin4;
sin4 = (const struct sockaddr_in *)sa;
inet_ntop(AF_INET, &sin4->sin_addr, buf, buflen);
} else if (sa->sa_family == AF_INET6) {
const struct sockaddr_in6 *sin6;
sin6 = (const struct sockaddr_in6 *)sa;
inet_ntop(AF_INET6, &sin6->sin6_addr, buf, buflen);
} else if (sa->sa_family == AF_LINK) {
const struct sockaddr_dl *sdl;
sdl = (const struct sockaddr_dl *)sa;
snprintf(buf, buflen, "if#%d", sdl->sdl_index);
} else
snprintf(buf, buflen, "af:%d", sa->sa_family);
}
static void
print_nhop(const char *prefix, const struct nhop_object *nh)
{
char src_buf[INET6_ADDRSTRLEN], addr_buf[INET6_ADDRSTRLEN];
print_nhop_sa(src_buf, sizeof(src_buf), nh->nh_ifa->ifa_addr);
print_nhop_sa(addr_buf, sizeof(addr_buf), &nh->gw_sa);
DPRINTF("%s nhop priv %p: AF %d ifp %p %s addr %s src %p %s aifp %p %s mtu %d nh_flags %X",
prefix, nh->nh_priv, nh->nh_priv->nh_family, nh->nh_ifp,
if_name(nh->nh_ifp), addr_buf, nh->nh_ifa, src_buf, nh->nh_aifp,
if_name(nh->nh_aifp), nh->nh_mtu, nh->nh_flags);
}
static void
destroy_nhop(struct nhop_priv *nh_priv)
{
struct nhop_object *nh;
nh = nh_priv->nh;
print_nhop("DEL", nh);
if_rele(nh->nh_ifp);
if_rele(nh->nh_aifp);
ifa_free(nh->nh_ifa);
counter_u64_free(nh->nh_pksent);
uma_zfree(nhops_zone, nh);
}
/*
* Epoch callback indicating nhop is safe to destroy
*/
static void
destroy_nhop_epoch(epoch_context_t ctx)
{
struct nhop_priv *nh_priv;
nh_priv = __containerof(ctx, struct nhop_priv, nh_epoch_ctx);
destroy_nhop(nh_priv);
}
int
nhop_ref_object(struct nhop_object *nh)
{
return (refcount_acquire_if_not_zero(&nh->nh_priv->nh_refcnt));
}
void
nhop_free(struct nhop_object *nh)
{
struct nh_control *ctl;
struct nhop_priv *nh_priv = nh->nh_priv;
struct epoch_tracker et;
if (!refcount_release(&nh_priv->nh_refcnt))
return;
/*
* There are only 2 places, where nh_linked can be decreased:
* rib destroy (nhops_destroy_rib) and this function.
* nh_link can never be increased.
*
* Hence, use initial value of 2 to make use of
* refcount_release_if_not_last().
*
* There can be two scenarious when calling this function:
*
* 1) nh_linked value is 2. This means that either
* nhops_destroy_rib() has not been called OR it is running,
* but we are guaranteed that nh_control won't be freed in
* this epoch. Hence, nexthop can be safely unlinked.
*
* 2) nh_linked value is 1. In that case, nhops_destroy_rib()
* has been called and nhop unlink can be skipped.
*/
NET_EPOCH_ENTER(et);
if (refcount_release_if_not_last(&nh_priv->nh_linked)) {
ctl = nh_priv->nh_control;
if (unlink_nhop(ctl, nh_priv) == NULL) {
/* Do not try to reclaim */
DPRINTF("Failed to unlink nexhop %p", nh_priv);
NET_EPOCH_EXIT(et);
return;
}
}
NET_EPOCH_EXIT(et);
epoch_call(net_epoch_preempt, destroy_nhop_epoch,
&nh_priv->nh_epoch_ctx);
}
int
nhop_ref_any(struct nhop_object *nh)
{
return (nhop_ref_object(nh));
}
void
nhop_free_any(struct nhop_object *nh)
{
nhop_free(nh);
}
/* Helper functions */
uint32_t
nhop_get_idx(const struct nhop_object *nh)
{
return (nh->nh_priv->nh_idx);
}
enum nhop_type
nhop_get_type(const struct nhop_object *nh)
{
return (nh->nh_priv->nh_type);
}
void
nhop_set_type(struct nhop_object *nh, enum nhop_type nh_type)
{
nh->nh_priv->nh_type = nh_type;
}
int
nhop_get_rtflags(const struct nhop_object *nh)
{
return (nh->nh_priv->rt_flags);
}
void
nhop_set_rtflags(struct nhop_object *nh, int rt_flags)
{
nh->nh_priv->rt_flags = rt_flags;
}
void
nhops_update_ifmtu(struct rib_head *rh, struct ifnet *ifp, uint32_t mtu)
{
struct nh_control *ctl;
struct nhop_priv *nh_priv;
struct nhop_object *nh;
ctl = rh->nh_control;
NHOPS_WLOCK(ctl);
CHT_SLIST_FOREACH(&ctl->nh_head, nhops, nh_priv) {
nh = nh_priv->nh;
if (nh->nh_ifp == ifp) {
if ((nh_priv->rt_flags & RTF_FIXEDMTU) == 0 ||
nh->nh_mtu > mtu) {
/* Update MTU directly */
nh->nh_mtu = mtu;
}
}
} CHT_SLIST_FOREACH_END;
NHOPS_WUNLOCK(ctl);
}
/*
* Dumps a single entry to sysctl buffer.
*
* Layout:
* rt_msghdr - generic RTM header to allow users to skip non-understood messages
* nhop_external - nexhop description structure (with length)
* nhop_addrs - structure encapsulating GW/SRC sockaddrs
*/
static int
dump_nhop_entry(struct rib_head *rh, struct nhop_object *nh, struct sysctl_req *w)
{
struct {
struct rt_msghdr rtm;
struct nhop_external nhe;
struct nhop_addrs na;
} arpc;
struct nhop_external *pnhe;
struct sockaddr *gw_sa, *src_sa;
struct sockaddr_storage ss;
size_t addrs_len;
int error;
//DPRINTF("Dumping: head %p nh %p flags %X req %p\n", rh, nh, nh->nh_flags, w);
memset(&arpc, 0, sizeof(arpc));
arpc.rtm.rtm_msglen = sizeof(arpc);
arpc.rtm.rtm_version = RTM_VERSION;
arpc.rtm.rtm_type = RTM_GET;
//arpc.rtm.rtm_flags = RTF_UP;
arpc.rtm.rtm_flags = nh->nh_priv->rt_flags;
/* nhop_external */
pnhe = &arpc.nhe;
pnhe->nh_len = sizeof(struct nhop_external);
pnhe->nh_idx = nh->nh_priv->nh_idx;
pnhe->nh_fib = rh->rib_fibnum;
pnhe->ifindex = nh->nh_ifp->if_index;
pnhe->aifindex = nh->nh_aifp->if_index;
pnhe->nh_family = nh->nh_priv->nh_family;
pnhe->nh_type = nh->nh_priv->nh_type;
pnhe->nh_mtu = nh->nh_mtu;
pnhe->nh_flags = nh->nh_flags;
memcpy(pnhe->nh_prepend, nh->nh_prepend, sizeof(nh->nh_prepend));
pnhe->prepend_len = nh->nh_prepend_len;
pnhe->nh_refcount = nh->nh_priv->nh_refcnt;
pnhe->nh_pksent = counter_u64_fetch(nh->nh_pksent);
/* sockaddr container */
addrs_len = sizeof(struct nhop_addrs);
arpc.na.gw_sa_off = addrs_len;
gw_sa = (struct sockaddr *)&nh->gw4_sa;
addrs_len += gw_sa->sa_len;
src_sa = nh->nh_ifa->ifa_addr;
if (src_sa->sa_family == AF_LINK) {
/* Shorten structure */
memset(&ss, 0, sizeof(struct sockaddr_storage));
fill_sdl_from_ifp((struct sockaddr_dl_short *)&ss,
nh->nh_ifa->ifa_ifp);
src_sa = (struct sockaddr *)&ss;
}
arpc.na.src_sa_off = addrs_len;
addrs_len += src_sa->sa_len;
/* Write total container length */
arpc.na.na_len = addrs_len;
arpc.rtm.rtm_msglen += arpc.na.na_len - sizeof(struct nhop_addrs);
error = SYSCTL_OUT(w, &arpc, sizeof(arpc));
if (error == 0)
error = SYSCTL_OUT(w, gw_sa, gw_sa->sa_len);
if (error == 0)
error = SYSCTL_OUT(w, src_sa, src_sa->sa_len);
return (error);
}
int
nhops_dump_sysctl(struct rib_head *rh, struct sysctl_req *w)
{
struct nh_control *ctl;
struct nhop_priv *nh_priv;
int error;
ctl = rh->nh_control;
NHOPS_RLOCK(ctl);
DPRINTF("NHDUMP: count=%u", ctl->nh_head.items_count);
CHT_SLIST_FOREACH(&ctl->nh_head, nhops, nh_priv) {
error = dump_nhop_entry(rh, nh_priv->nh, w);
if (error != 0) {
NHOPS_RUNLOCK(ctl);
return (error);
}
} CHT_SLIST_FOREACH_END;
NHOPS_RUNLOCK(ctl);
return (0);
}

219
sys/net/route/nhop_utils.c Normal file
View file

@ -0,0 +1,219 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2020 Alexander V. Chernikov
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_route.h"
#include "opt_mpath.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/kernel.h>
#include <net/route/nhop_utils.h>
#define BLOCK_ITEMS (8 * sizeof(u_long)) /* Number of items for ffsl() */
#define _BLOCKS_TO_SZ(_blocks) ((size_t)(_blocks) * sizeof(u_long))
#define _BLOCKS_TO_ITEMS(_blocks) ((uint32_t)(_blocks) * BLOCK_ITEMS)
#define _ITEMS_TO_BLOCKS(_items) ((_items) / BLOCK_ITEMS)
static void _bitmask_init_idx(void *index, uint32_t items);
void
bitmask_init(struct bitmask_head *bh, void *idx, uint32_t num_items)
{
if (idx != NULL)
_bitmask_init_idx(idx, num_items);
memset(bh, 0, sizeof(struct bitmask_head));
bh->blocks = _ITEMS_TO_BLOCKS(num_items);
bh->idx = (u_long *)idx;
}
uint32_t
bitmask_get_resize_items(const struct bitmask_head *bh)
{
if ((bh->items_count * 2 > _BLOCKS_TO_ITEMS(bh->blocks)) && bh->items_count < 65536)
return (_BLOCKS_TO_ITEMS(bh->blocks) * 2);
return (0);
}
int
bitmask_should_resize(const struct bitmask_head *bh)
{
return (bitmask_get_resize_items(bh) != 0);
}
#if 0
uint32_t
_bitmask_get_blocks(uint32_t items)
{
return (items / BLOCK_ITEMS);
}
#endif
size_t
bitmask_get_size(uint32_t items)
{
#if _KERNEL
KASSERT((items % BLOCK_ITEMS) == 0,
("bitmask size needs to power of 2 and greater or equal to %zu",
BLOCK_ITEMS));
#else
assert((items % BLOCK_ITEMS) == 0);
#endif
return (items / 8);
}
static void
_bitmask_init_idx(void *_idx, uint32_t items)
{
size_t size = bitmask_get_size(items);
u_long *idx = (u_long *)_idx;
/* Mark all as free */
memset(idx, 0xFF, size);
*idx &= ~(u_long)1; /* Always skip index 0 */
}
/*
* _try_merge api to allow shrinking?
*/
int
bitmask_copy(const struct bitmask_head *bi, void *new_idx, uint32_t new_items)
{
uint32_t new_blocks = _BLOCKS_TO_ITEMS(new_items);
_bitmask_init_idx(new_idx, new_items);
if (bi->blocks < new_blocks) {
/* extend current blocks */
if (bi->blocks > 0)
memcpy(new_idx, bi->idx, _BLOCKS_TO_SZ(bi->blocks));
return (0);
} else {
/* XXX: ensure all other blocks are non-zero */
for (int i = new_blocks; i < bi->blocks; i++) {
}
return (1);
}
}
void
bitmask_swap(struct bitmask_head *bh, void *new_idx, uint32_t new_items, void **pidx)
{
void *old_ptr;
old_ptr = bh->idx;
bh->idx = (u_long *)new_idx;
bh->blocks = _ITEMS_TO_BLOCKS(new_items);
if (pidx != NULL)
*pidx = old_ptr;
}
/*
* Allocate new index in given instance and stores in in @pidx.
* Returns 0 on success.
*/
int
bitmask_alloc_idx(struct bitmask_head *bi, uint16_t *pidx)
{
u_long *mask;
int i, off, v;
off = bi->free_off;
mask = &bi->idx[off];
for (i = off; i < bi->blocks; i++, mask++) {
if ((v = ffsl(*mask)) == 0)
continue;
/* Mark as busy */
*mask &= ~ ((u_long)1 << (v - 1));
bi->free_off = i;
v = BLOCK_ITEMS * i + v - 1;
*pidx = v;
bi->items_count++;
return (0);
}
return (1);
}
/*
* Removes index from given set.
* Returns 0 on success.
*/
int
bitmask_free_idx(struct bitmask_head *bi, uint16_t idx)
{
u_long *mask;
int i, v;
if (idx == 0)
return (1);
i = idx / BLOCK_ITEMS;
v = idx % BLOCK_ITEMS;
if (i >= bi->blocks)
return (1);
mask = &bi->idx[i];
if ((*mask & ((u_long)1 << v)) != 0)
return (1);
/* Mark as free */
*mask |= (u_long)1 << v;
bi->items_count--;
/* Update free offset */
if (bi->free_off > i)
bi->free_off = i;
return (0);
}

200
sys/net/route/nhop_utils.h Normal file
View file

@ -0,0 +1,200 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2020 Alexander V. Chernikov
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _NET_ROUTE_NHOP_UTILS_H_
#define _NET_ROUTE_NHOP_UTILS_H_
/* Chained hash table */
struct _cht_head {
uint32_t hash_size;
uint32_t items_count;
void **ptr;
};
static inline uint32_t
_cht_get_resize_size(const struct _cht_head *head)
{
uint32_t new_size = 0;
if ((head->items_count * 2 > head->hash_size) && (head->hash_size < 65536))
new_size = head->hash_size * 2;
else if ((head->items_count * 4 < head->hash_size) && head->hash_size > 16)
new_size = head->hash_size / 2;
return (new_size);
}
static inline int
_cht_need_resize(const struct _cht_head *head)
{
return (_cht_get_resize_size(head) > 0);
}
#ifndef typeof
#define typeof __typeof
#endif
#define CHT_SLIST_NEED_RESIZE(_head) \
_cht_need_resize((const struct _cht_head *)(_head))
#define CHT_SLIST_GET_RESIZE_BUCKETS(_head) \
_cht_get_resize_size((const struct _cht_head *)(_head))
#define CHT_SLIST_GET_RESIZE_SIZE(_buckets) ((_buckets) * sizeof(void *))
#define CHT_SLIST_DEFINE(_HNAME, _ITEM_TYPE) \
struct _HNAME##_head { \
uint32_t hash_size; \
uint32_t items_count; \
_ITEM_TYPE **ptr; \
}
#define CHT_SLIST_INIT(_head, _ptr, _num_buckets) \
(_head)->hash_size = _num_buckets; \
(_head)->items_count = 0; \
(_head)->ptr = _ptr;
/* Default hash method for constant-size keys */
#define CHT_GET_BUCK(_head, _PX, _key) _PX##_hash_key(_key) & ((_head)->hash_size - 1)
#define CHT_GET_BUCK_OBJ(_head, _PX, _obj) _PX##_hash_obj(_obj) & ((_head)->hash_size - 1)
#define CHT_FIRST(_head, idx) _CHT_FIRST((_head)->ptr, idx)
#define _CHT_FIRST(_ptr, idx) (_ptr)[idx]
#define CHT_SLIST_FIND(_head, _PX, _key, _ret) do { \
uint32_t _buck = CHT_GET_BUCK(_head, _PX, _key); \
_ret = CHT_FIRST(_head, _buck); \
for ( ; _ret != NULL; _ret = _PX##_next(_ret)) { \
if (_PX##_cmp(_key, (_ret))) \
break; \
} \
} while(0)
/*
* hash_obj, nhop_cmp
*/
#define CHT_SLIST_FIND_BYOBJ(_head, _PX, _obj, _ret) do { \
uint32_t _buck = CHT_GET_BUCK_OBJ(_head, _PX, _obj); \
_ret = CHT_FIRST(_head, _buck); \
for ( ; _ret != NULL; _ret = _PX##_next(_ret)) { \
if (_PX##_cmp(_obj, _ret)) \
break; \
} \
} while(0)
#define CHT_SLIST_INSERT_HEAD(_head, _PX, _obj) do { \
uint32_t _buck = CHT_GET_BUCK_OBJ(_head, _PX, _obj); \
_PX##_next(_obj) = CHT_FIRST(_head, _buck); \
CHT_FIRST(_head, _buck) = _obj; \
(_head)->items_count++; \
} while(0)
#define CHT_SLIST_REMOVE(_head, _PX, _key, _ret) do { \
typeof(*(_head)->ptr) _tmp; \
uint32_t _buck = CHT_GET_BUCK(_head, _PX, _key); \
_ret = CHT_FIRST(_head, _buck); \
_tmp = NULL; \
for ( ; _ret != NULL; _tmp = _ret, _ret = _PX##_next(_ret)) { \
if (_PX##_cmp(_key, _ret)) \
break; \
} \
if (_ret != NULL) { \
if (_tmp == NULL) \
CHT_FIRST(_head, _buck) = _PX##_next(_ret); \
else \
_PX##_next(_tmp) = _PX##_next(_ret); \
(_head)->items_count--; \
} \
} while(0)
#define CHT_SLIST_REMOVE_BYOBJ(_head, _PX, _obj, _ret) do { \
typeof(*(_head)->ptr) _tmp; \
uint32_t _buck = CHT_GET_BUCK_OBJ(_head, _PX, _obj); \
_ret = CHT_FIRST(_head, _buck); \
_tmp = NULL; \
for ( ; _ret != NULL; _tmp = _ret, _ret = _PX##_next(_ret)) { \
if (_PX##_cmp(_obj, _ret)) \
break; \
} \
if (_ret != NULL) { \
if (_tmp == NULL) \
CHT_FIRST(_head, _buck) = _PX##_next(_ret); \
else \
_PX##_next(_tmp) = _PX##_next(_ret); \
(_head)->items_count--; \
} \
} while(0)
#define CHT_SLIST_FOREACH(_head, _PX, _x) \
for (uint32_t _i = 0; _i < (_head)->hash_size; _i++) { \
for (_x = CHT_FIRST(_head, _i); _x; _x = _PX##_next(_x))
#define CHT_SLIST_FOREACH_END }
#define CHT_SLIST_RESIZE(_head, _PX, _new_void_ptr, _new_hsize) \
uint32_t _new_idx; \
typeof((_head)->ptr) _new_ptr = (void *)_new_void_ptr; \
typeof(*(_head)->ptr) _x, _y; \
for (uint32_t _old_idx = 0; _old_idx < (_head)->hash_size; _old_idx++) {\
_x = CHT_FIRST(_head, _old_idx); \
_y = _x; \
while (_y != NULL) { \
_y = _PX##_next(_x); \
_new_idx = _PX##_hash_obj(_x) & (_new_hsize - 1);\
_PX##_next(_x) = _CHT_FIRST(_new_ptr, _new_idx);\
_CHT_FIRST(_new_ptr, _new_idx) = _x; \
_x = _y; \
} \
} \
(_head)->hash_size = _new_hsize; \
_new_void_ptr = (void *)(_head)->ptr; \
(_head)->ptr = _new_ptr;
/* bitmasks */
struct bitmask_head {
uint16_t free_off; /* index of the first potentially free block */
uint16_t blocks; /* number of 4/8-byte blocks in the index */
uint32_t items_count; /* total number of items */
u_long *idx;
};
size_t bitmask_get_size(uint32_t items);
uint32_t bitmask_get_resize_items(const struct bitmask_head *nh);
int bitmask_should_resize(const struct bitmask_head *bh);
void bitmask_swap(struct bitmask_head *bh, void *new_idx, uint32_t new_items, void **pidx);
void bitmask_init(struct bitmask_head *bh, void *idx, uint32_t num_items);
int bitmask_copy(const struct bitmask_head *bi, void *new_idx, uint32_t new_items);
int bitmask_alloc_idx(struct bitmask_head *bi, uint16_t *pidx);
int bitmask_free_idx(struct bitmask_head *bi, uint16_t idx);
#endif

96
sys/net/route/nhop_var.h Normal file
View file

@ -0,0 +1,96 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2020 Alexander V. Chernikov
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
/*
* This header file contains private definitions for nexthop routing.
*
* Header is not intended to be included by the code external to the
* routing subsystem.
*/
#ifndef _NET_ROUTE_NHOP_VAR_H_
#define _NET_ROUTE_NHOP_VAR_H_
/* define nhop hash table */
struct nhop_priv;
CHT_SLIST_DEFINE(nhops, struct nhop_priv);
/* produce hash value for an object */
#define nhops_hash_obj(_obj) hash_priv(_obj)
/* compare two objects */
#define nhops_cmp(_one, _two) cmp_priv(_one, _two)
/* next object accessor */
#define nhops_next(_obj) (_obj)->nh_next
struct nh_control {
struct nhops_head nh_head; /* hash table head */
struct bitmask_head nh_idx_head; /* nhop index head */
struct rwlock ctl_lock; /* overall ctl lock */
struct rib_head *ctl_rh; /* pointer back to rnh */
struct epoch_context ctl_epoch_ctx; /* epoch ctl helper */
};
#define NHOPS_WLOCK(ctl) rw_wlock(&(ctl)->ctl_lock)
#define NHOPS_RLOCK(ctl) rw_rlock(&(ctl)->ctl_lock)
#define NHOPS_WUNLOCK(ctl) rw_wunlock(&(ctl)->ctl_lock)
#define NHOPS_RUNLOCK(ctl) rw_runlock(&(ctl)->ctl_lock)
#define NHOPS_LOCK_INIT(ctl) rw_init(&(ctl)->ctl_lock, "nhop_ctl")
#define NHOPS_LOCK_DESTROY(ctl) rw_destroy(&(ctl)->ctl_lock)
#define NHOPS_WLOCK_ASSERT(ctl) rw_assert(&(ctl)->ctl_lock, RA_WLOCKED)
/* Control plane-only nhop data */
struct nhop_object;
struct nhop_priv {
uint32_t nh_idx; /* nexthop index */
uint8_t nh_family; /* address family of the lookup */
uint16_t nh_type; /* nexthop type */
void *cb_func; /* function handling additional rewrite caps */
u_int nh_refcnt; /* number of references, refcount(9) */
u_int nh_linked; /* refcount(9), == 2 if linked to the list */
int rt_flags; /* routing flags for the control plane */
struct nhop_object *nh; /* backreference to the dataplane nhop */
struct nh_control *nh_control; /* backreference to the rnh */
struct nhop_priv *nh_next; /* hash table membership */
struct epoch_context nh_epoch_ctx; /* epoch data for nhop */
};
#define NH_IS_PINNED(_nh) ((_nh)->nh_priv->rt_flags & RTF_PINNED)
/* nhop.c */
struct nhop_priv *find_nhop(struct nh_control *ctl,
const struct nhop_priv *nh_priv);
int link_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv);
struct nhop_priv *unlink_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv);
/* nhop_ctl.c */
int cmp_priv(const struct nhop_priv *_one, const struct nhop_priv *_two);
#endif

65
sys/net/route/route_ctl.c Normal file
View file

@ -0,0 +1,65 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2020 Alexander V. Chernikov
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/rmlock.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/vnet.h>
#include <net/route.h>
#include <net/route_var.h>
#include <net/route/nhop_utils.h>
#include <net/route/nhop.h>
#include <net/route/nhop_var.h>
#include <net/route/shared.h>
#include <netinet/in.h>
#include <vm/uma.h>
/*
* This file contains control plane routing tables functions.
*
* All functions assumes they are called in net epoch.
*/

View file

@ -0,0 +1,83 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2020 Alexander V. Chernikov
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_route.h"
#include <sys/param.h>
#include <sys/jail.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/sysproto.h>
#include <sys/proc.h>
#include <sys/domain.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/rmlock.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <net/route_var.h>
#include <net/route/nhop_utils.h>
#include <net/route/nhop.h>
#include <net/route/nhop_var.h>
#include <net/route/shared.h>
#include <net/vnet.h>
/*
* RIB helper functions.
*/
/*
* Calls @wa_f with @arg for each entry in the table specified by
* @af and @fibnum.
*
* Table is traversed under read lock.
*/
void
rib_walk(int af, u_int fibnum, rt_walktree_f_t *wa_f, void *arg)
{
RIB_RLOCK_TRACKER;
struct rib_head *rnh;
if ((rnh = rt_tables_get_rnh(fibnum, af)) == NULL)
return;
RIB_RLOCK(rnh);
rnh->rnh_walktree(&rnh->head, (walktree_f_t *)wa_f, arg);
RIB_RUNLOCK(rnh);
}

68
sys/net/route/shared.h Normal file
View file

@ -0,0 +1,68 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2020 Alexander V. Chernikov
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
/*
* Contains various definitions shared between the parts of a routing subsystem.
*
* Header is not intended to be included by the code external to the
* routing subsystem.
*/
#ifndef _NET_ROUTE_SHARED_H_
#define _NET_ROUTE_SHARED_H_
#ifdef RTDEBUG
#define DPRINTF(_fmt, ...) printf("%s: " _fmt "\n", __func__ , ## __VA_ARGS__)
#else
#define DPRINTF(_fmt, ...)
#endif
struct rib_head;
/* Nexhops */
void nhops_init(void);
int nhops_init_rib(struct rib_head *rh);
void nhops_destroy_rib(struct rib_head *rh);
int nhop_ref_object(struct nhop_object *nh);
int nhop_ref_any(struct nhop_object *nh);
void nhop_free_any(struct nhop_object *nh);
void nhop_set_type(struct nhop_object *nh, enum nhop_type nh_type);
void nhop_set_rtflags(struct nhop_object *nh, int rt_flags);
int nhop_create_from_info(struct rib_head *rnh, struct rt_addrinfo *info,
struct nhop_object **nh_ret);
int nhop_create_from_nhop(struct rib_head *rnh, const struct nhop_object *nh_orig,
struct rt_addrinfo *info, struct nhop_object **pnh_priv);
void nhops_update_ifmtu(struct rib_head *rh, struct ifnet *ifp, uint32_t mtu);
int nhops_dump_sysctl(struct rib_head *rh, struct sysctl_req *w);
#endif

View file

@ -32,6 +32,10 @@
#ifndef _NET_ROUTE_VAR_H_
#define _NET_ROUTE_VAR_H_
struct nh_control;
typedef int rnh_preadd_entry_f_t(u_int fibnum, const struct sockaddr *addr,
const struct sockaddr *mask, struct nhop_object *nh);
struct rib_head {
struct radix_head head;
rn_matchaddr_f_t *rnh_matchaddr; /* longest match for sockaddr */
@ -41,6 +45,7 @@ struct rib_head {
rn_walktree_t *rnh_walktree; /* traverse tree */
rn_walktree_from_t *rnh_walktree_from; /* traverse tree below a */
rn_close_t *rnh_close; /*do something when the last ref drops*/
rnh_preadd_entry_f_t *rnh_preadd; /* hook to alter record prior to insertion */
rt_gen_t rnh_gen; /* generation counter */
int rnh_multipath; /* multipath capable ? */
struct radix_node rnh_nodes[3]; /* empty tree for common case */
@ -51,6 +56,7 @@ struct rib_head {
u_int rib_fibnum; /* fib number */
struct callout expire_callout; /* Callout for expiring dynamic routes */
time_t next_expire; /* Next expire run ts */
struct nh_control *nh_control; /* nexthop subsystem data */
};
#define RIB_RLOCK_TRACKER struct rm_priotracker _rib_tracker
@ -90,6 +96,44 @@ _Static_assert(__offsetof(struct route, ro_dst) == __offsetof(_ro_new, _dst_new)
struct rib_head *rt_tables_get_rnh(int fib, int family);
void rt_mpath_init_rnh(struct rib_head *rnh);
VNET_PCPUSTAT_DECLARE(struct rtstat, rtstat);
#define RTSTAT_ADD(name, val) \
VNET_PCPUSTAT_ADD(struct rtstat, rtstat, name, (val))
#define RTSTAT_INC(name) RTSTAT_ADD(name, 1)
/*
* With the split between the routing entry and the nexthop,
* rt_flags has to be split between these 2 entries. As rtentry
* mostly contains prefix data and is thought to be generic enough
* so one can transparently change the nexthop pointer w/o requiring
* any other rtentry changes, most of rt_flags shifts to the particular nexthop.
* /
*
* RTF_UP: rtentry, as an indication that it is linked.
* RTF_HOST: rtentry, nhop. The latter indication is needed for the datapath
* RTF_DYNAMIC: nhop, to make rtentry generic.
* RTF_MODIFIED: nhop, to make rtentry generic. (legacy)
* -- "native" path (nhop) properties:
* RTF_GATEWAY, RTF_STATIC, RTF_PROTO1, RTF_PROTO2, RTF_PROTO3, RTF_FIXEDMTU,
* RTF_PINNED, RTF_REJECT, RTF_BLACKHOLE, RTF_BROADCAST
*/
/* Nexthop rt flags mask */
#define NHOP_RT_FLAG_MASK (RTF_GATEWAY | RTF_HOST | RTF_REJECT | RTF_DYNAMIC | \
RTF_MODIFIED | RTF_STATIC | RTF_BLACKHOLE | RTF_PROTO1 | RTF_PROTO2 | \
RTF_PROTO3 | RTF_FIXEDMTU | RTF_PINNED | RTF_BROADCAST)
/* rtentry rt flag mask */
#define RTE_RT_FLAG_MASK (RTF_UP | RTF_HOST)
/* Nexthop selection */
#define _NH2MP(_nh) ((struct nhgrp_object *)(_nh))
#define _SELECT_NHOP(_nh, _flowid) \
(_NH2MP(_nh))->nhops[(_flowid) % (_NH2MP(_nh))->mp_size]
#define _RT_SELECT_NHOP(_nh, _flowid) \
((!NH_IS_MULTIPATH(_nh)) ? (_nh) : _SELECT_NHOP(_nh, _flowid))
#define RT_SELECT_NHOP(_rt, _flowid) _RT_SELECT_NHOP((_rt)->rt_nhop, _flowid)
/* rte<>nhop translation */
static inline uint16_t
fib_rte_to_nh_flags(int rt_flags)

View file

@ -77,6 +77,7 @@
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
#endif
#include <net/route/nhop.h>
#ifdef COMPAT_FREEBSD32
#include <sys/mount.h>
@ -1076,6 +1077,7 @@ rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out)
out->rmx_mtu = rt->rt_mtu;
out->rmx_weight = rt->rt_weight;
out->rmx_pksent = counter_u64_fetch(rt->rt_pksent);
out->rmx_nhidx = nhop_get_idx(rt->rt_nhop);
/* Kernel -> userland timebase conversion. */
out->rmx_expire = rt->rt_expire ?
rt->rt_expire - time_uptime + time_second : 0;
@ -2025,7 +2027,7 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
namelen--;
if (req->newptr)
return (EPERM);
if (name[1] == NET_RT_DUMP) {
if (name[1] == NET_RT_DUMP || name[1] == NET_RT_NHOP) {
if (namelen == 3)
fib = req->td->td_proc->p_fibnum;
else if (namelen == 4)
@ -2092,7 +2094,25 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
error = EAFNOSUPPORT;
}
break;
case NET_RT_NHOP:
/* Allow dumping one specific af/fib at a time */
if (namelen < 4) {
error = EINVAL;
break;
}
fib = name[3];
if (fib < 0 || fib > rt_numfibs) {
error = EINVAL;
break;
}
rnh = rt_tables_get_rnh(fib, af);
if (rnh == NULL) {
error = EAFNOSUPPORT;
break;
}
if (w.w_op == NET_RT_NHOP)
error = nhops_dump_sysctl(rnh, w.w_req);
break;
case NET_RT_IFLIST:
case NET_RT_IFLISTL:
error = sysctl_iflist(af, &w);

View file

@ -49,6 +49,8 @@ __FBSDID("$FreeBSD$");
#include <net/if_dl.h>
#include <net/route.h>
#include <net/route_var.h>
#include <net/route/nhop.h>
#include <net/route/shared.h>
#include <net/vnet.h>
#ifdef RADIX_MPATH
@ -60,59 +62,49 @@ __FBSDID("$FreeBSD$");
#include <netinet/in_fib.h>
#ifdef INET
static void fib4_rte_to_nh_basic(struct rtentry *rte, struct in_addr dst,
static void fib4_rte_to_nh_basic(struct nhop_object *nh, struct in_addr dst,
uint32_t flags, struct nhop4_basic *pnh4);
static void fib4_rte_to_nh_extended(struct rtentry *rte, struct in_addr dst,
static void fib4_rte_to_nh_extended(struct nhop_object *nh, struct in_addr dst,
uint32_t flags, struct nhop4_extended *pnh4);
#define RNTORT(p) ((struct rtentry *)(p))
static void
fib4_rte_to_nh_basic(struct rtentry *rte, struct in_addr dst,
fib4_rte_to_nh_basic(struct nhop_object *nh, struct in_addr dst,
uint32_t flags, struct nhop4_basic *pnh4)
{
struct sockaddr_in *gw;
if ((flags & NHR_IFAIF) != 0)
pnh4->nh_ifp = rte->rt_ifa->ifa_ifp;
pnh4->nh_ifp = nh->nh_ifa->ifa_ifp;
else
pnh4->nh_ifp = nh->nh_ifp;
pnh4->nh_mtu = nh->nh_mtu;
if (nh->nh_flags & NHF_GATEWAY)
pnh4->nh_addr = nh->gw4_sa.sin_addr;
else
pnh4->nh_ifp = rte->rt_ifp;
pnh4->nh_mtu = min(rte->rt_mtu, rte->rt_ifp->if_mtu);
if (rte->rt_flags & RTF_GATEWAY) {
gw = (struct sockaddr_in *)rte->rt_gateway;
pnh4->nh_addr = gw->sin_addr;
} else
pnh4->nh_addr = dst;
/* Set flags */
pnh4->nh_flags = fib_rte_to_nh_flags(rte->rt_flags);
gw = (struct sockaddr_in *)rt_key(rte);
if (gw->sin_addr.s_addr == 0)
pnh4->nh_flags |= NHF_DEFAULT;
pnh4->nh_flags = nh->nh_flags;
/* TODO: Handle RTF_BROADCAST here */
}
static void
fib4_rte_to_nh_extended(struct rtentry *rte, struct in_addr dst,
fib4_rte_to_nh_extended(struct nhop_object *nh, struct in_addr dst,
uint32_t flags, struct nhop4_extended *pnh4)
{
struct sockaddr_in *gw;
if ((flags & NHR_IFAIF) != 0)
pnh4->nh_ifp = rte->rt_ifa->ifa_ifp;
pnh4->nh_ifp = nh->nh_ifa->ifa_ifp;
else
pnh4->nh_ifp = nh->nh_ifp;
pnh4->nh_mtu = nh->nh_mtu;
if (nh->nh_flags & NHF_GATEWAY)
pnh4->nh_addr = nh->gw4_sa.sin_addr;
else
pnh4->nh_ifp = rte->rt_ifp;
pnh4->nh_mtu = min(rte->rt_mtu, rte->rt_ifp->if_mtu);
if (rte->rt_flags & RTF_GATEWAY) {
gw = (struct sockaddr_in *)rte->rt_gateway;
pnh4->nh_addr = gw->sin_addr;
} else
pnh4->nh_addr = dst;
/* Set flags */
pnh4->nh_flags = fib_rte_to_nh_flags(rte->rt_flags);
gw = (struct sockaddr_in *)rt_key(rte);
if (gw->sin_addr.s_addr == 0)
pnh4->nh_flags |= NHF_DEFAULT;
pnh4->nh_ia = ifatoia(rte->rt_ifa);
pnh4->nh_flags = nh->nh_flags;
pnh4->nh_ia = ifatoia(nh->nh_ifa);
pnh4->nh_src = IA_SIN(pnh4->nh_ia)->sin_addr;
}
@ -135,7 +127,7 @@ fib4_lookup_nh_basic(uint32_t fibnum, struct in_addr dst, uint32_t flags,
struct rib_head *rh;
struct radix_node *rn;
struct sockaddr_in sin;
struct rtentry *rte;
struct nhop_object *nh;
KASSERT((fibnum < rt_numfibs), ("fib4_lookup_nh_basic: bad fibnum"));
rh = rt_tables_get_rnh(fibnum, AF_INET);
@ -150,10 +142,10 @@ fib4_lookup_nh_basic(uint32_t fibnum, struct in_addr dst, uint32_t flags,
RIB_RLOCK(rh);
rn = rh->rnh_matchaddr((void *)&sin, &rh->head);
if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
rte = RNTORT(rn);
nh = RNTORT(rn)->rt_nhop;
/* Ensure route & ifp is UP */
if (RT_LINK_IS_UP(rte->rt_ifp)) {
fib4_rte_to_nh_basic(rte, dst, flags, pnh4);
if (RT_LINK_IS_UP(nh->nh_ifp)) {
fib4_rte_to_nh_basic(nh, dst, flags, pnh4);
RIB_RUNLOCK(rh);
return (0);
@ -185,6 +177,7 @@ fib4_lookup_nh_ext(uint32_t fibnum, struct in_addr dst, uint32_t flags,
struct radix_node *rn;
struct sockaddr_in sin;
struct rtentry *rte;
struct nhop_object *nh;
KASSERT((fibnum < rt_numfibs), ("fib4_lookup_nh_ext: bad fibnum"));
rh = rt_tables_get_rnh(fibnum, AF_INET);
@ -207,9 +200,10 @@ fib4_lookup_nh_ext(uint32_t fibnum, struct in_addr dst, uint32_t flags,
return (ENOENT);
}
#endif
nh = rte->rt_nhop;
/* Ensure route & ifp is UP */
if (RT_LINK_IS_UP(rte->rt_ifp)) {
fib4_rte_to_nh_extended(rte, dst, flags, pnh4);
if (RT_LINK_IS_UP(nh->nh_ifp)) {
fib4_rte_to_nh_extended(nh, dst, flags, pnh4);
if ((flags & NHR_REF) != 0) {
/* TODO: lwref on egress ifp's ? */
}
@ -229,4 +223,138 @@ fib4_free_nh_ext(uint32_t fibnum, struct nhop4_extended *pnh4)
}
/*
* Looks up path in fib @fibnum specified by @dst.
* Returns path nexthop on success. Nexthop is safe to use
* within the current network epoch. If longer lifetime is required,
* one needs to pass NHR_REF as a flag. This will return referenced
* nexthop.
*/
struct nhop_object *
fib4_lookup(uint32_t fibnum, struct in_addr dst, uint32_t scopeid,
uint32_t flags, uint32_t flowid)
{
RIB_RLOCK_TRACKER;
struct rib_head *rh;
struct radix_node *rn;
struct rtentry *rt;
struct nhop_object *nh;
KASSERT((fibnum < rt_numfibs), ("fib4_lookup: bad fibnum"));
rh = rt_tables_get_rnh(fibnum, AF_INET);
if (rh == NULL)
return (NULL);
/* Prepare lookup key */
struct sockaddr_in sin4;
memset(&sin4, 0, sizeof(sin4));
sin4.sin_family = AF_INET;
sin4.sin_len = sizeof(struct sockaddr_in);
sin4.sin_addr = dst;
nh = NULL;
RIB_RLOCK(rh);
rn = rh->rnh_matchaddr((void *)&sin4, &rh->head);
if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
rt = RNTORT(rn);
#ifdef RADIX_MPATH
if (rt_mpath_next(rt) != NULL)
rt = rt_mpath_selectrte(rt, flowid);
#endif
nh = rt->rt_nhop;
/* Ensure route & ifp is UP */
if (RT_LINK_IS_UP(nh->nh_ifp)) {
if (flags & NHR_REF)
nhop_ref_object(nh);
RIB_RUNLOCK(rh);
return (nh);
}
}
RIB_RUNLOCK(rh);
RTSTAT_INC(rts_unreach);
return (NULL);
}
inline static int
check_urpf(const struct nhop_object *nh, uint32_t flags,
const struct ifnet *src_if)
{
if (src_if != NULL && nh->nh_aifp == src_if) {
return (1);
}
if (src_if == NULL) {
if ((flags & NHR_NODEFAULT) == 0)
return (1);
else if ((nh->nh_flags & NHF_DEFAULT) == 0)
return (1);
}
return (0);
}
#ifdef RADIX_MPATH
inline static int
check_urpf_mpath(struct rtentry *rt, uint32_t flags,
const struct ifnet *src_if)
{
while (rt != NULL) {
if (check_urpf(rt->rt_nhop, flags, src_if) != 0)
return (1);
rt = rt_mpath_next(rt);
}
return (0);
}
#endif
/*
* Performs reverse path forwarding lookup.
* If @src_if is non-zero, verifies that at least 1 path goes via
* this interface.
* If @src_if is zero, verifies that route exist.
* if @flags contains NHR_NOTDEFAULT, do not consider default route.
*
* Returns 1 if route matching conditions is found, 0 otherwise.
*/
int
fib4_check_urpf(uint32_t fibnum, struct in_addr dst, uint32_t scopeid,
uint32_t flags, const struct ifnet *src_if)
{
RIB_RLOCK_TRACKER;
struct rib_head *rh;
struct radix_node *rn;
struct rtentry *rt;
int ret;
KASSERT((fibnum < rt_numfibs), ("fib4_check_urpf: bad fibnum"));
rh = rt_tables_get_rnh(fibnum, AF_INET);
if (rh == NULL)
return (0);
/* Prepare lookup key */
struct sockaddr_in sin4;
memset(&sin4, 0, sizeof(sin4));
sin4.sin_len = sizeof(struct sockaddr_in);
sin4.sin_addr = dst;
RIB_RLOCK(rh);
rn = rh->rnh_matchaddr((void *)&sin4, &rh->head);
if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
rt = RNTORT(rn);
#ifdef RADIX_MPATH
ret = check_urpf_mpath(rt, flags, src_if);
#else
ret = check_urpf(rt->rt_nhop, flags, src_if);
#endif
RIB_RUNLOCK(rh);
return (ret);
}
RIB_RUNLOCK(rh);
return (0);
}
#endif

View file

@ -58,5 +58,9 @@ int fib4_lookup_nh_ext(uint32_t fibnum, struct in_addr dst, uint32_t flags,
uint32_t flowid, struct nhop4_extended *pnh4);
void fib4_free_nh_ext(uint32_t fibnum, struct nhop4_extended *pnh4);
struct nhop_object *fib4_lookup(uint32_t fibnum, struct in_addr dst,
uint32_t scopeid, uint32_t flags, uint32_t flowid);
int fib4_check_urpf(uint32_t fibnum, struct in_addr dst, uint32_t scopeid,
uint32_t flags, const struct ifnet *src_if);
#endif

View file

@ -43,6 +43,8 @@ __FBSDID("$FreeBSD$");
#include <net/if_var.h>
#include <net/route.h>
#include <net/route_var.h>
#include <net/route/nhop.h>
#include <net/route/shared.h>
#include <net/vnet.h>
#include <netinet/in.h>
@ -56,6 +58,67 @@ extern int in_inithead(void **head, int off, u_int fibnum);
extern int in_detachhead(void **head, int off);
#endif
static int
rib4_preadd(u_int fibnum, const struct sockaddr *addr, const struct sockaddr *mask,
struct nhop_object *nh)
{
const struct sockaddr_in *addr4 = (const struct sockaddr_in *)addr;
uint16_t nh_type;
int rt_flags;
/* XXX: RTF_LOCAL && RTF_MULTICAST */
rt_flags = nhop_get_rtflags(nh);
if (rt_flags & RTF_HOST) {
/*
* Backward compatibility:
* if the destination is broadcast,
* mark route as broadcast.
* This behavior was useful when route cloning
* was in place, so there was an explicit cloned
* route for every broadcasted address.
* Currently (2020-04) there is no kernel machinery
* to do route cloning, though someone might explicitly
* add these routes to support some cases with active-active
* load balancing. Given that, retain this support.
*/
if (in_broadcast(addr4->sin_addr, nh->nh_ifp)) {
rt_flags |= RTF_BROADCAST;
nhop_set_rtflags(nh, rt_flags);
nh->nh_flags |= NHF_BROADCAST;
}
}
/*
* Check route MTU:
* inherit interface MTU if not set or
* check if MTU is too large.
*/
if (nh->nh_mtu == 0) {
nh->nh_mtu = nh->nh_ifp->if_mtu;
} else if (nh->nh_mtu > nh->nh_ifp->if_mtu)
nh->nh_mtu = nh->nh_ifp->if_mtu;
/* Ensure that default route nhop has special flag */
const struct sockaddr_in *mask4 = (const struct sockaddr_in *)mask;
if ((rt_flags & RTF_HOST) == 0 && mask4->sin_addr.s_addr == 0)
nh->nh_flags |= NHF_DEFAULT;
/* Set nhop type to basic per-AF nhop */
if (nhop_get_type(nh) == 0) {
if (nh->nh_flags & NHF_GATEWAY)
nh_type = NH_TYPE_IPV4_ETHER_NHOP;
else
nh_type = NH_TYPE_IPV4_ETHER_RSLV;
nhop_set_type(nh, nh_type);
}
return (0);
}
/*
* Do what we need to do when inserting a route.
*/
@ -126,6 +189,7 @@ in_inithead(void **head, int off, u_int fibnum)
if (rh == NULL)
return (0);
rh->rnh_preadd = rib4_preadd;
rh->rnh_addaddr = in_addroute;
#ifdef RADIX_MPATH
rt_mpath_init_rnh(rh);

View file

@ -50,6 +50,8 @@ __FBSDID("$FreeBSD$");
#include <net/if_dl.h>
#include <net/route.h>
#include <net/route_var.h>
#include <net/route/nhop.h>
#include <net/route/shared.h>
#include <net/vnet.h>
#ifdef RADIX_MPATH
@ -68,94 +70,63 @@ __FBSDID("$FreeBSD$");
#include <net/if_types.h>
#ifdef INET6
static void fib6_rte_to_nh_extended(struct rtentry *rte,
static void fib6_rte_to_nh_extended(const struct nhop_object *nh,
const struct in6_addr *dst, uint32_t flags, struct nhop6_extended *pnh6);
static void fib6_rte_to_nh_basic(struct rtentry *rte, const struct in6_addr *dst,
static void fib6_rte_to_nh_basic(const struct nhop_object *nh, const struct in6_addr *dst,
uint32_t flags, struct nhop6_basic *pnh6);
static struct ifnet *fib6_get_ifaifp(struct rtentry *rte);
#define RNTORT(p) ((struct rtentry *)(p))
#define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa))
CHK_STRUCT_ROUTE_COMPAT(struct route_in6, ro_dst);
/*
* Gets real interface for the @rte.
* Returns rt_ifp for !IFF_LOOPBACK routers.
* Extracts "real" address interface from interface address
* loopback routes.
*/
static struct ifnet *
fib6_get_ifaifp(struct rtentry *rte)
{
struct ifnet *ifp;
struct sockaddr_dl *sdl;
ifp = rte->rt_ifp;
if ((ifp->if_flags & IFF_LOOPBACK) &&
rte->rt_gateway->sa_family == AF_LINK) {
sdl = (struct sockaddr_dl *)rte->rt_gateway;
return (ifnet_byindex(sdl->sdl_index));
}
return (ifp);
}
static void
fib6_rte_to_nh_basic(struct rtentry *rte, const struct in6_addr *dst,
fib6_rte_to_nh_basic(const struct nhop_object *nh, const struct in6_addr *dst,
uint32_t flags, struct nhop6_basic *pnh6)
{
struct sockaddr_in6 *gw;
/* Do explicit nexthop zero unless we're copying it */
memset(pnh6, 0, sizeof(*pnh6));
if ((flags & NHR_IFAIF) != 0)
pnh6->nh_ifp = fib6_get_ifaifp(rte);
pnh6->nh_ifp = nh->nh_aifp;
else
pnh6->nh_ifp = rte->rt_ifp;
pnh6->nh_ifp = nh->nh_ifp;
pnh6->nh_mtu = min(rte->rt_mtu, IN6_LINKMTU(rte->rt_ifp));
if (rte->rt_flags & RTF_GATEWAY) {
pnh6->nh_mtu = nh->nh_mtu;
if (nh->nh_flags & NHF_GATEWAY) {
/* Return address with embedded scope. */
gw = (struct sockaddr_in6 *)rte->rt_gateway;
pnh6->nh_addr = gw->sin6_addr;
pnh6->nh_addr = nh->gw6_sa.sin6_addr;
} else
pnh6->nh_addr = *dst;
/* Set flags */
pnh6->nh_flags = fib_rte_to_nh_flags(rte->rt_flags);
gw = (struct sockaddr_in6 *)rt_key(rte);
if (IN6_IS_ADDR_UNSPECIFIED(&gw->sin6_addr))
pnh6->nh_flags |= NHF_DEFAULT;
pnh6->nh_flags = nh->nh_flags;
}
static void
fib6_rte_to_nh_extended(struct rtentry *rte, const struct in6_addr *dst,
fib6_rte_to_nh_extended(const struct nhop_object *nh, const struct in6_addr *dst,
uint32_t flags, struct nhop6_extended *pnh6)
{
struct sockaddr_in6 *gw;
/* Do explicit nexthop zero unless we're copying it */
memset(pnh6, 0, sizeof(*pnh6));
if ((flags & NHR_IFAIF) != 0)
pnh6->nh_ifp = fib6_get_ifaifp(rte);
pnh6->nh_ifp = nh->nh_aifp;
else
pnh6->nh_ifp = rte->rt_ifp;
pnh6->nh_ifp = nh->nh_ifp;
pnh6->nh_mtu = min(rte->rt_mtu, IN6_LINKMTU(rte->rt_ifp));
if (rte->rt_flags & RTF_GATEWAY) {
pnh6->nh_mtu = nh->nh_mtu;
if (nh->nh_flags & NHF_GATEWAY) {
/* Return address with embedded scope. */
gw = (struct sockaddr_in6 *)rte->rt_gateway;
pnh6->nh_addr = gw->sin6_addr;
pnh6->nh_addr = nh->gw6_sa.sin6_addr;
} else
pnh6->nh_addr = *dst;
/* Set flags */
pnh6->nh_flags = fib_rte_to_nh_flags(rte->rt_flags);
gw = (struct sockaddr_in6 *)rt_key(rte);
if (IN6_IS_ADDR_UNSPECIFIED(&gw->sin6_addr))
pnh6->nh_flags |= NHF_DEFAULT;
pnh6->nh_ia = ifatoia6(rte->rt_ifa);
pnh6->nh_flags = nh->nh_flags;
pnh6->nh_ia = ifatoia6(nh->nh_ifa);
}
/*
@ -180,7 +151,7 @@ fib6_lookup_nh_basic(uint32_t fibnum, const struct in6_addr *dst, uint32_t scope
struct rib_head *rh;
struct radix_node *rn;
struct sockaddr_in6 sin6;
struct rtentry *rte;
struct nhop_object *nh;
KASSERT((fibnum < rt_numfibs), ("fib6_lookup_nh_basic: bad fibnum"));
rh = rt_tables_get_rnh(fibnum, AF_INET6);
@ -198,10 +169,10 @@ fib6_lookup_nh_basic(uint32_t fibnum, const struct in6_addr *dst, uint32_t scope
RIB_RLOCK(rh);
rn = rh->rnh_matchaddr((void *)&sin6, &rh->head);
if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
rte = RNTORT(rn);
nh = RNTORT(rn)->rt_nhop;
/* Ensure route & ifp is UP */
if (RT_LINK_IS_UP(rte->rt_ifp)) {
fib6_rte_to_nh_basic(rte, &sin6.sin6_addr, flags, pnh6);
if (RT_LINK_IS_UP(nh->nh_ifp)) {
fib6_rte_to_nh_basic(nh, &sin6.sin6_addr, flags, pnh6);
RIB_RUNLOCK(rh);
return (0);
}
@ -231,6 +202,7 @@ fib6_lookup_nh_ext(uint32_t fibnum, const struct in6_addr *dst,uint32_t scopeid,
struct radix_node *rn;
struct sockaddr_in6 sin6;
struct rtentry *rte;
struct nhop_object *nh;
KASSERT((fibnum < rt_numfibs), ("fib6_lookup_nh_ext: bad fibnum"));
rh = rt_tables_get_rnh(fibnum, AF_INET6);
@ -256,9 +228,10 @@ fib6_lookup_nh_ext(uint32_t fibnum, const struct in6_addr *dst,uint32_t scopeid,
return (ENOENT);
}
#endif
nh = rte->rt_nhop;
/* Ensure route & ifp is UP */
if (RT_LINK_IS_UP(rte->rt_ifp)) {
fib6_rte_to_nh_extended(rte, &sin6.sin6_addr, flags,
if (RT_LINK_IS_UP(nh->nh_ifp)) {
fib6_rte_to_nh_extended(nh, &sin6.sin6_addr, flags,
pnh6);
if ((flags & NHR_REF) != 0) {
/* TODO: Do lwref on egress ifp's */
@ -279,5 +252,145 @@ fib6_free_nh_ext(uint32_t fibnum, struct nhop6_extended *pnh6)
}
/*
* Looks up path in fib @fibnum specified by @dst.
* Assumes scope is deembedded and provided in @scopeid.
*
* Returns path nexthop on success. Nexthop is safe to use
* within the current network epoch. If longer lifetime is required,
* one needs to pass NHR_REF as a flag. This will return referenced
* nexthop.
*/
struct nhop_object *
fib6_lookup(uint32_t fibnum, const struct in6_addr *dst6,
uint32_t scopeid, uint32_t flags, uint32_t flowid)
{
RIB_RLOCK_TRACKER;
struct rib_head *rh;
struct radix_node *rn;
struct rtentry *rt;
struct nhop_object *nh;
struct sockaddr_in6 sin6;
KASSERT((fibnum < rt_numfibs), ("fib6_lookup: bad fibnum"));
rh = rt_tables_get_rnh(fibnum, AF_INET6);
if (rh == NULL)
return (NULL);
/* TODO: radix changes */
//addr = *dst6;
/* Prepare lookup key */
memset(&sin6, 0, sizeof(sin6));
sin6.sin6_len = sizeof(struct sockaddr_in6);
sin6.sin6_addr = *dst6;
/* Assume scopeid is valid and embed it directly */
if (IN6_IS_SCOPE_LINKLOCAL(dst6))
sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff);
RIB_RLOCK(rh);
rn = rh->rnh_matchaddr((void *)&sin6, &rh->head);
if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
rt = RNTORT(rn);
#ifdef RADIX_MPATH
if (rt_mpath_next(rt) != NULL)
rt = rt_mpath_selectrte(rt, flowid);
#endif
nh = rt->rt_nhop;
/* Ensure route & ifp is UP */
if (RT_LINK_IS_UP(nh->nh_ifp)) {
if (flags & NHR_REF)
nhop_ref_object(nh);
RIB_RUNLOCK(rh);
return (nh);
}
}
RIB_RUNLOCK(rh);
RTSTAT_INC(rts_unreach);
return (NULL);
}
inline static int
check_urpf(const struct nhop_object *nh, uint32_t flags,
const struct ifnet *src_if)
{
if (src_if != NULL && nh->nh_aifp == src_if) {
return (1);
}
if (src_if == NULL) {
if ((flags & NHR_NODEFAULT) == 0)
return (1);
else if ((nh->nh_flags & NHF_DEFAULT) == 0)
return (1);
}
return (0);
}
#ifdef RADIX_MPATH
inline static int
check_urpf_mpath(struct rtentry *rt, uint32_t flags,
const struct ifnet *src_if)
{
while (rt != NULL) {
if (check_urpf(rt->rt_nhop, flags, src_if) != 0)
return (1);
rt = rt_mpath_next(rt);
}
return (0);
}
#endif
/*
* Performs reverse path forwarding lookup.
* If @src_if is non-zero, verifies that at least 1 path goes via
* this interface.
* If @src_if is zero, verifies that route exist.
* if @flags contains NHR_NOTDEFAULT, do not consider default route.
*
* Returns 1 if route matching conditions is found, 0 otherwise.
*/
int
fib6_check_urpf(uint32_t fibnum, const struct in6_addr *dst6,
uint32_t scopeid, uint32_t flags, const struct ifnet *src_if)
{
RIB_RLOCK_TRACKER;
struct rib_head *rh;
struct radix_node *rn;
struct rtentry *rt;
struct in6_addr addr;
int ret;
KASSERT((fibnum < rt_numfibs), ("fib6_check_urpf: bad fibnum"));
rh = rt_tables_get_rnh(fibnum, AF_INET6);
if (rh == NULL)
return (0);
addr = *dst6;
/* Assume scopeid is valid and embed it directly */
if (IN6_IS_SCOPE_LINKLOCAL(dst6))
addr.s6_addr16[1] = htons(scopeid & 0xffff);
RIB_RLOCK(rh);
rn = rh->rnh_matchaddr((void *)&addr, &rh->head);
if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
rt = RNTORT(rn);
#ifdef RADIX_MPATH
ret = check_urpf_mpath(rt, flags, src_if);
#else
ret = check_urpf(rt->rt_nhop, flags, src_if);
#endif
RIB_RUNLOCK(rh);
return (ret);
}
RIB_RUNLOCK(rh);
return (0);
}
#endif

View file

@ -58,5 +58,11 @@ int fib6_lookup_nh_ext(uint32_t fibnum, const struct in6_addr *dst,
uint32_t scopeid, uint32_t flags, uint32_t flowid,
struct nhop6_extended *pnh6);
void fib6_free_nh_ext(uint32_t fibnum, struct nhop6_extended *pnh6);
struct nhop_object *fib6_lookup(uint32_t fibnum,
const struct in6_addr *dst6, uint32_t scopeid, uint32_t flags,
uint32_t flowid);
int fib6_check_urpf(uint32_t fibnum, const struct in6_addr *dst6,
uint32_t scopeid, uint32_t flags, const struct ifnet *src_if);
#endif

View file

@ -82,6 +82,8 @@ __FBSDID("$FreeBSD$");
#include <net/if_var.h>
#include <net/route.h>
#include <net/route_var.h>
#include <net/route/nhop.h>
#include <net/route/shared.h>
#include <netinet/in.h>
#include <netinet/ip_var.h>
@ -103,6 +105,43 @@ extern int in6_inithead(void **head, int off, u_int fibnum);
extern int in6_detachhead(void **head, int off);
#endif
static int
rib6_preadd(u_int fibnum, const struct sockaddr *addr, const struct sockaddr *mask,
struct nhop_object *nh)
{
uint16_t nh_type;
/* XXX: RTF_LOCAL */
/*
* Check route MTU:
* inherit interface MTU if not set or
* check if MTU is too large.
*/
if (nh->nh_mtu == 0) {
nh->nh_mtu = IN6_LINKMTU(nh->nh_ifp);
} else if (nh->nh_mtu > IN6_LINKMTU(nh->nh_ifp))
nh->nh_mtu = IN6_LINKMTU(nh->nh_ifp);
/* Ensure that default route nhop has special flag */
const struct sockaddr_in6 *mask6 = (const struct sockaddr_in6 *)mask;
if ((nhop_get_rtflags(nh) & RTF_HOST) == 0 &&
IN6_IS_ADDR_UNSPECIFIED(&mask6->sin6_addr))
nh->nh_flags |= NHF_DEFAULT;
/* Set nexthop type */
if (nhop_get_type(nh) == 0) {
if (nh->nh_flags & NHF_GATEWAY)
nh_type = NH_TYPE_IPV6_ETHER_NHOP;
else
nh_type = NH_TYPE_IPV6_ETHER_RSLV;
nhop_set_type(nh, nh_type);
}
return (0);
}
/*
* Do what we need to do when inserting a route.
*/
@ -169,6 +208,7 @@ in6_inithead(void **head, int off, u_int fibnum)
return (0);
rh->rnh_addaddr = in6_addroute;
rh->rnh_preadd = rib6_preadd;
#ifdef RADIX_MPATH
rt_mpath_init_rnh(rh);
#endif

View file

@ -416,6 +416,7 @@ struct sockproto {
#define NET_RT_IFMALIST 4 /* return multicast address list */
#define NET_RT_IFLISTL 5 /* Survey interface list, using 'l'en
* versions of msghdr structs. */
#define NET_RT_NHOP 6 /* dump routing nexthops */
#endif /* __BSD_VISIBLE */
/*

View file

@ -5,7 +5,7 @@
PROG= netstat
SRCS= if.c inet.c main.c mbuf.c mroute.c netisr.c nl_symbols.c route.c \
unix.c mroute6.c ipsec.c bpf.c pfkey.c sctp.c \
unix.c mroute6.c ipsec.c bpf.c pfkey.c sctp.c common.c nhops.c \
nl_defs.h
nl_symbols.c: nlist_symbols

140
usr.bin/netstat/common.c Normal file
View file

@ -0,0 +1,140 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1983, 1988, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
#include <sys/time.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <arpa/inet.h>
#include <ifaddrs.h>
#include <libutil.h>
#include <netdb.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <sysexits.h>
#include <unistd.h>
#include <err.h>
#include <libxo/xo.h>
#include "netstat.h"
#include "common.h"
const char *
fmt_flags(const struct bits *p, int f)
{
static char name[33];
char *flags;
for (flags = name; p->b_mask; p++)
if (p->b_mask & f)
*flags++ = p->b_val;
*flags = '\0';
return (name);
}
void
print_flags_generic(int flags, const struct bits *pbits, const char *format,
const char *tag_name)
{
const struct bits *p;
char tag_fmt[64];
xo_emit(format, fmt_flags(pbits, flags));
snprintf(tag_fmt, sizeof(tag_fmt), "{le:%s/%%s}", tag_name);
xo_open_list(tag_name);
for (p = pbits; p->b_mask; p++)
if (p->b_mask & flags)
xo_emit(tag_fmt, p->b_name);
xo_close_list(tag_name);
}
struct ifmap_entry *
prepare_ifmap(size_t *pifmap_size)
{
int ifindex = 0, size;
struct ifaddrs *ifap, *ifa;
struct sockaddr_dl *sdl;
struct ifmap_entry *ifmap = NULL;
int ifmap_size = 0;
/*
* Retrieve interface list at first
* since we need #ifindex -> if_xname match
*/
if (getifaddrs(&ifap) != 0)
err(EX_OSERR, "getifaddrs");
for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
if (ifa->ifa_addr->sa_family != AF_LINK)
continue;
sdl = (struct sockaddr_dl *)ifa->ifa_addr;
ifindex = sdl->sdl_index;
if (ifindex >= ifmap_size) {
size = roundup(ifindex + 1, 32) *
sizeof(struct ifmap_entry);
if ((ifmap = realloc(ifmap, size)) == NULL)
errx(2, "realloc(%d) failed", size);
memset(&ifmap[ifmap_size], 0,
size - ifmap_size *
sizeof(struct ifmap_entry));
ifmap_size = roundup(ifindex + 1, 32);
}
if (*ifmap[ifindex].ifname != '\0')
continue;
strlcpy(ifmap[ifindex].ifname, ifa->ifa_name, IFNAMSIZ);
}
freeifaddrs(ifap);
*pifmap_size = ifmap_size;
return (ifmap);
}

58
usr.bin/netstat/common.h Normal file
View file

@ -0,0 +1,58 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1992, 1993
* Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)netstat.h 8.2 (Berkeley) 1/4/94
* $FreeBSD$
*/
#ifndef _NETSTAT_COMMON_H_
#define _NETSTAT_COMMON_H_
struct bits {
u_long b_mask;
char b_val;
const char *b_name;
};
extern struct bits rt_bits[];
const char *fmt_flags(const struct bits *p, int f);
void print_flags_generic(int flags, const struct bits *pbits,
const char *format, const char *tag_name);
int print_sockaddr(const char *name, struct sockaddr *sa,
struct sockaddr *mask, int flags, int width);
struct ifmap_entry {
char ifname[IFNAMSIZ];
};
struct ifmap_entry *prepare_ifmap(size_t *ifmap_size);
#endif

View file

@ -214,6 +214,7 @@ int mflag; /* show memory stats */
int noutputs = 0; /* how much outputs before we exit */
int numeric_addr; /* show addresses numerically */
int numeric_port; /* show ports numerically */
int oflag; /* show nexthop objects*/
int Pflag; /* show TCP log ID */
static int pflag; /* show given protocol */
static int Qflag; /* show netisr information */
@ -248,7 +249,7 @@ main(int argc, char *argv[])
if (argc < 0)
exit(EXIT_FAILURE);
while ((ch = getopt(argc, argv, "46AaBbdF:f:ghI:iLlM:mN:nPp:Qq:RrSTsuWw:xz"))
while ((ch = getopt(argc, argv, "46AaBbdF:f:ghI:iLlM:mN:noPp:Qq:RrSTsuWw:xz"))
!= -1)
switch(ch) {
case '4':
@ -345,6 +346,9 @@ main(int argc, char *argv[])
case 'n':
numeric_addr = numeric_port = 1;
break;
case 'o':
oflag = 1;
break;
case 'P':
Pflag = 1;
break;
@ -494,6 +498,14 @@ main(int argc, char *argv[])
xo_finish();
exit(0);
}
if (oflag) {
xo_open_container("statistics");
nhops_print(fib, af);
xo_close_container("statistics");
xo_finish();
exit(0);
}
if (gflag) {
xo_open_container("statistics");

View file

@ -147,6 +147,10 @@ void rt_stats(void);
char *routename(struct sockaddr *, int);
const char *netname(struct sockaddr *, struct sockaddr *);
void routepr(int, int);
int p_sockaddr(const char *name, struct sockaddr *sa,
struct sockaddr *mask, int flags, int width);
const char *fmt_sockaddr(struct sockaddr *sa, struct sockaddr *mask,
int flags);
#ifdef NETGRAPH
void netgraphprotopr(u_long, const char *, int, int);
@ -157,3 +161,4 @@ void unixpr(u_long, u_long, u_long, u_long, u_long, bool *);
void mroutepr(void);
void mrt_stats(void);
void bpf_stats(char *);
void nhops_print(int fibnum, int af);

472
usr.bin/netstat/nhops.c Normal file
View file

@ -0,0 +1,472 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1983, 1988, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/cdefs.h>
#include <sys/param.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
#include <sys/time.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/if_types.h>
#include <net/route.h>
#include <net/route/nhop.h>
#include <netinet/in.h>
#include <netgraph/ng_socket.h>
#include <arpa/inet.h>
#include <ifaddrs.h>
#include <libutil.h>
#include <netdb.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <sysexits.h>
#include <unistd.h>
#include <err.h>
#include <libxo/xo.h>
#include "netstat.h"
#include "common.h"
/* column widths; each followed by one space */
#ifndef INET6
#define WID_DST_DEFAULT(af) 18 /* width of destination column */
#define WID_GW_DEFAULT(af) 18 /* width of gateway column */
#define WID_IF_DEFAULT(af) (Wflag ? 10 : 8) /* width of netif column */
#else
#define WID_DST_DEFAULT(af) \
((af) == AF_INET6 ? (numeric_addr ? 33: 18) : 18)
#define WID_GW_DEFAULT(af) \
((af) == AF_INET6 ? (numeric_addr ? 29 : 18) : 18)
#define WID_IF_DEFAULT(af) ((af) == AF_INET6 ? 8 : (Wflag ? 10 : 8))
#endif /*INET6*/
static int wid_dst;
static int wid_gw;
static int wid_flags;
static int wid_pksent;
static int wid_mtu;
static int wid_if;
static int wid_nhidx;
static int wid_nhtype;
static int wid_refcnt;
static int wid_prepend;
static struct bits nh_bits[] = {
{ NHF_REJECT, 'R', "reject" },
{ NHF_BLACKHOLE,'B', "blackhole" },
{ NHF_REDIRECT, 'r', "redirect" },
{ NHF_GATEWAY, 'G', "gateway" },
{ NHF_DEFAULT, 'd', "default" },
{ NHF_BROADCAST,'b', "broadcast" },
{ 0 , 0, NULL }
};
static char *nh_types[] = {
"empty", /* 0 */
"v4/resolve", /* 1 */
"v4/gw",
"v6/resolve",
"v6/gw"
};
struct nhop_entry {
char gw[64];
char ifname[IFNAMSIZ];
};
struct nhop_map {
struct nhop_entry *ptr;
size_t size;
};
static struct nhop_map global_nhop_map;
static void nhop_map_update(struct nhop_map *map, uint32_t idx,
char *gw, char *ifname);
static struct nhop_entry *nhop_get(struct nhop_map *map, uint32_t idx);
static struct ifmap_entry *ifmap;
static size_t ifmap_size;
static void
print_sockaddr_buf(char *buf, size_t bufsize, const struct sockaddr *sa)
{
switch (sa->sa_family) {
case AF_INET:
inet_ntop(AF_INET, &((struct sockaddr_in *)sa)->sin_addr,
buf, bufsize);
break;
case AF_INET6:
inet_ntop(AF_INET6, &((struct sockaddr_in6 *)sa)->sin6_addr,
buf, bufsize);
break;
default:
snprintf(buf, bufsize, "unknown:%d", sa->sa_family);
break;
}
}
static int
print_addr(const char *name, const char *addr, int width)
{
char buf[128];
int protrusion;
if (width < 0) {
snprintf(buf, sizeof(buf), "{:%s/%%s} ", name);
xo_emit(buf, addr);
protrusion = 0;
} else {
if (Wflag != 0 || numeric_addr) {
snprintf(buf, sizeof(buf), "{[:%d}{:%s/%%s}{]:} ",
-width, name);
xo_emit(buf, addr);
protrusion = strlen(addr) - width;
if (protrusion < 0)
protrusion = 0;
} else {
snprintf(buf, sizeof(buf), "{[:%d}{:%s/%%-.*s}{]:} ",
-width, name);
xo_emit(buf, width, addr);
protrusion = 0;
}
}
return (protrusion);
}
static void
print_nhop_header(int af1 __unused)
{
if (Wflag) {
xo_emit("{T:/%-*.*s} {T:/%-*.*s} {T:/%-*.*s} {T:/%-*.*s} {T:/%*.*s} "
"{T:/%*.*s} {T:/%-*.*s} {T:/%*.*s} {T:/%*.*s} {T:/%*.*s} {T:/%*s}\n",
wid_nhidx, wid_nhidx, "Idx",
wid_nhtype, wid_nhtype, "Type",
wid_dst, wid_dst, "IFA",
wid_gw, wid_gw, "Gateway",
wid_flags, wid_flags, "Flags",
wid_pksent, wid_pksent, "Use",
wid_mtu, wid_mtu, "Mtu",
wid_if, wid_if, "Netif",
wid_if, wid_if, "Addrif",
wid_refcnt, wid_refcnt, "Refcnt",
wid_prepend, "Prepend");
} else {
xo_emit("{T:/%-*.*s} {T:/%-*.*s} {T:/%-*.*s} {T:/%-*.*s} {T:/%*.*s} "
" {T:/%*s}\n",
wid_nhidx, wid_nhidx, "Idx",
wid_dst, wid_dst, "IFA",
wid_gw, wid_gw, "Gateway",
wid_flags, wid_flags, "Flags",
wid_if, wid_if, "Netif",
wid_prepend, "Refcnt");
}
}
static void
nhop_map_update(struct nhop_map *map, uint32_t idx, char *gw, char *ifname)
{
if (idx >= map->size) {
uint32_t new_size;
size_t sz;
if (map->size == 0)
new_size = 32;
else
new_size = map->size * 2;
if (new_size <= idx)
new_size = roundup(idx + 1, 32);
sz = new_size * (sizeof(struct nhop_entry));
if ((map->ptr = realloc(map->ptr, sz)) == NULL)
errx(2, "realloc(%lu) failed", sz);
memset(&map->ptr[map->size], 0, (new_size - map->size) * sizeof(struct nhop_entry));
map->size = new_size;
}
strlcpy(map->ptr[idx].ifname, ifname, sizeof(map->ptr[idx].ifname));
strlcpy(map->ptr[idx].gw, gw, sizeof(map->ptr[idx].gw));
}
static struct nhop_entry *
nhop_get(struct nhop_map *map, uint32_t idx)
{
if (idx >= map->size)
return (NULL);
if (*map->ptr[idx].ifname == '\0')
return (NULL);
return &map->ptr[idx];
}
static void
print_nhop_entry_sysctl(const char *name, struct rt_msghdr *rtm, struct nhop_external *nh)
{
char buffer[128];
char iface_name[128];
int protrusion;
char gw_addr[64];
struct nhop_addrs *na;
struct sockaddr *sa_gw, *sa_ifa;
xo_open_instance(name);
snprintf(buffer, sizeof(buffer), "{[:-%d}{:index/%%lu}{]:} ", wid_nhidx);
//xo_emit("{t:index/%-lu} ", wid_nhidx, nh->nh_idx);
xo_emit(buffer, nh->nh_idx);
if (Wflag) {
char *cp = nh_types[nh->nh_type];
xo_emit("{t:type_str/%*s} ", wid_nhtype, cp);
}
memset(iface_name, 0, sizeof(iface_name));
if (nh->ifindex < (uint32_t)ifmap_size) {
strlcpy(iface_name, ifmap[nh->ifindex].ifname,
sizeof(iface_name));
if (*iface_name == '\0')
strlcpy(iface_name, "---", sizeof(iface_name));
}
na = (struct nhop_addrs *)((char *)nh + nh->nh_len);
//inet_ntop(nh->nh_family, &nh->nh_src, src_addr, sizeof(src_addr));
//protrusion = p_addr("ifa", src_addr, wid_dst);
sa_gw = (struct sockaddr *)((char *)na + na->gw_sa_off);
sa_ifa = (struct sockaddr *)((char *)na + na->src_sa_off);
protrusion = p_sockaddr("ifa", sa_ifa, NULL, RTF_HOST, wid_dst);
if (nh->nh_flags & NHF_GATEWAY) {
const char *cp;
cp = fmt_sockaddr(sa_gw, NULL, RTF_HOST);
strlcpy(gw_addr, cp, sizeof(gw_addr));
} else
snprintf(gw_addr, sizeof(gw_addr), "%s/resolve", iface_name);
protrusion = print_addr("gateway", gw_addr, wid_dst - protrusion);
nhop_map_update(&global_nhop_map, nh->nh_idx, gw_addr, iface_name);
snprintf(buffer, sizeof(buffer), "{[:-%d}{:flags/%%s}{]:} ",
wid_flags - protrusion);
//p_nhflags(nh->nh_flags, buffer);
print_flags_generic(rtm->rtm_flags, rt_bits, buffer, "rt_flags_pretty");
if (Wflag) {
xo_emit("{t:use/%*lu} ", wid_pksent, nh->nh_pksent);
xo_emit("{t:mtu/%*lu} ", wid_mtu, nh->nh_mtu);
}
//printf("IDX: %d IFACE: %s FAMILY: %d TYPE: %d FLAGS: %X GW \n");
if (Wflag)
xo_emit("{t:interface-name/%*s}", wid_if, iface_name);
else
xo_emit("{t:interface-name/%*.*s}", wid_if, wid_if, iface_name);
memset(iface_name, 0, sizeof(iface_name));
if (nh->aifindex < (uint32_t)ifmap_size && nh->ifindex != nh->aifindex) {
strlcpy(iface_name, ifmap[nh->aifindex].ifname,
sizeof(iface_name));
if (*iface_name == '\0')
strlcpy(iface_name, "---", sizeof(iface_name));
}
if (Wflag)
xo_emit("{t:address-interface-name/%*s}", wid_if, iface_name);
xo_emit("{t:refcount/%*lu} ", wid_refcnt, nh->nh_refcount);
if (Wflag && nh->prepend_len) {
char *prepend_hex = "AABBCCDDEE";
xo_emit(" {:nhop-prepend/%*s}", wid_prepend, prepend_hex);
}
xo_emit("\n");
xo_close_instance(name);
}
struct nhops_map {
uint32_t idx;
struct rt_msghdr *rtm;
};
static int
cmp_nh_idx(const void *_a, const void *_b)
{
const struct nhops_map *a, *b;
a = _a;
b = _b;
if (a->idx > b->idx)
return (1);
else if (a->idx < b->idx)
return (-1);
return (0);
}
static void
print_nhops_sysctl(int fibnum, int af)
{
size_t needed;
int mib[7];
char *buf, *next, *lim;
struct rt_msghdr *rtm;
struct nhop_external *nh;
int fam;
struct nhops_map *nh_map;
size_t nh_count, nh_size;
mib[0] = CTL_NET;
mib[1] = PF_ROUTE;
mib[2] = 0;
mib[3] = af;
mib[4] = NET_RT_NHOP;
mib[5] = 0;
mib[6] = fibnum;
if (sysctl(mib, nitems(mib), NULL, &needed, NULL, 0) < 0)
err(EX_OSERR, "sysctl: net.route.0.%d.nhdump.%d estimate", af,
fibnum);
if ((buf = malloc(needed)) == NULL)
errx(2, "malloc(%lu)", (unsigned long)needed);
if (sysctl(mib, nitems(mib), buf, &needed, NULL, 0) < 0)
err(1, "sysctl: net.route.0.%d.nhdump.%d", af, fibnum);
lim = buf + needed;
xo_open_container("nhop-table");
xo_open_list("rt-family");
/*
* nexhops are received unsorted. Collect everything first, sort and then display
* sorted.
*/
nh_count = 0;
nh_size = 16;
nh_map = calloc(nh_size, sizeof(struct nhops_map));
for (next = buf; next < lim; next += rtm->rtm_msglen) {
rtm = (struct rt_msghdr *)next;
if (rtm->rtm_version != RTM_VERSION)
continue;
if (nh_count >= nh_size) {
nh_size *= 2;
nh_map = realloc(nh_map, nh_size * sizeof(struct nhops_map));
}
nh = (struct nhop_external *)(rtm + 1);
nh_map[nh_count].idx = nh->nh_idx;
nh_map[nh_count].rtm = rtm;
nh_count++;
}
if (nh_count > 0) {
qsort(nh_map, nh_count, sizeof(struct nhops_map), cmp_nh_idx);
nh = (struct nhop_external *)(nh_map[0].rtm + 1);
fam = nh->nh_family;
wid_dst = WID_GW_DEFAULT(fam);
wid_gw = WID_GW_DEFAULT(fam);
wid_nhidx = 5;
wid_nhtype = 12;
wid_refcnt = 6;
wid_flags = 6;
wid_pksent = 8;
wid_mtu = 6;
wid_if = WID_IF_DEFAULT(fam);
xo_open_instance("rt-family");
pr_family(fam);
xo_open_list("nh-entry");
print_nhop_header(fam);
for (size_t i = 0; i < nh_count; i++) {
rtm = nh_map[i].rtm;
nh = (struct nhop_external *)(rtm + 1);
print_nhop_entry_sysctl("nh-entry", rtm, nh);
}
xo_close_list("nh-entry");
xo_close_instance("rt-family");
}
xo_close_list("rt-family");
xo_close_container("nhop-table");
free(buf);
}
static void
p_nhflags(int f, const char *format)
{
struct bits *p;
char *pretty_name = "nh_flags_pretty";
xo_emit(format, fmt_flags(nh_bits, f));
xo_open_list(pretty_name);
for (p = nh_bits; p->b_mask; p++)
if (p->b_mask & f)
xo_emit("{le:nh_flags_pretty/%s}", p->b_name);
xo_close_list(pretty_name);
}
void
nhops_print(int fibnum, int af)
{
size_t intsize;
int numfibs;
intsize = sizeof(int);
if (fibnum == -1 &&
sysctlbyname("net.my_fibnum", &fibnum, &intsize, NULL, 0) == -1)
fibnum = 0;
if (sysctlbyname("net.fibs", &numfibs, &intsize, NULL, 0) == -1)
numfibs = 1;
if (fibnum < 0 || fibnum > numfibs - 1)
errx(EX_USAGE, "%d: invalid fib", fibnum);
ifmap = prepare_ifmap(&ifmap_size);
xo_open_container("route-nhop-information");
xo_emit("{T:Nexthop data}");
if (fibnum)
xo_emit(" ({L:fib}: {:fib/%d})", fibnum);
xo_emit("\n");
print_nhops_sysctl(fibnum, af);
xo_close_container("route-nhop-information");
}

View file

@ -69,16 +69,13 @@ __FBSDID("$FreeBSD$");
#include <err.h>
#include <libxo/xo.h>
#include "netstat.h"
#include "common.h"
#include "nl_defs.h"
/*
* Definitions for showing gateway flags.
*/
static struct bits {
u_long b_mask;
char b_val;
const char *b_name;
} bits[] = {
struct bits rt_bits[] = {
{ RTF_UP, 'U', "up" },
{ RTF_GATEWAY, 'G', "gateway" },
{ RTF_HOST, 'H', "host" },
@ -99,11 +96,8 @@ static struct bits {
{ 0 , 0, NULL }
};
struct ifmap_entry {
char ifname[IFNAMSIZ];
};
static struct ifmap_entry *ifmap;
static int ifmap_size;
static size_t ifmap_size;
static struct timespec uptime;
static const char *netname4(in_addr_t, in_addr_t);
@ -112,12 +106,7 @@ static const char *netname6(struct sockaddr_in6 *, struct sockaddr_in6 *);
#endif
static void p_rtable_sysctl(int, int);
static void p_rtentry_sysctl(const char *name, struct rt_msghdr *);
static int p_sockaddr(const char *name, struct sockaddr *, struct sockaddr *,
int, int);
static const char *fmt_sockaddr(struct sockaddr *sa, struct sockaddr *mask,
int flags);
static void p_flags(int, const char *);
static const char *fmt_flags(int f);
static void domask(char *, size_t, u_long);
@ -229,7 +218,7 @@ pr_rthdr(int af1 __unused)
wid_dst, wid_dst, "Destination",
wid_gw, wid_gw, "Gateway",
wid_flags, wid_flags, "Flags",
wid_pksent, wid_pksent, "Use",
wid_mtu, wid_mtu, "Nhop#",
wid_mtu, wid_mtu, "Mtu",
wid_if, wid_if, "Netif",
wid_expire, "Expire");
@ -252,46 +241,10 @@ p_rtable_sysctl(int fibnum, int af)
char *buf, *next, *lim;
struct rt_msghdr *rtm;
struct sockaddr *sa;
int fam = AF_UNSPEC, ifindex = 0, size;
int fam = AF_UNSPEC;
int need_table_close = false;
struct ifaddrs *ifap, *ifa;
struct sockaddr_dl *sdl;
/*
* Retrieve interface list at first
* since we need #ifindex -> if_xname match
*/
if (getifaddrs(&ifap) != 0)
err(EX_OSERR, "getifaddrs");
for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
if (ifa->ifa_addr->sa_family != AF_LINK)
continue;
sdl = (struct sockaddr_dl *)ifa->ifa_addr;
ifindex = sdl->sdl_index;
if (ifindex >= ifmap_size) {
size = roundup(ifindex + 1, 32) *
sizeof(struct ifmap_entry);
if ((ifmap = realloc(ifmap, size)) == NULL)
errx(2, "realloc(%d) failed", size);
memset(&ifmap[ifmap_size], 0,
size - ifmap_size *
sizeof(struct ifmap_entry));
ifmap_size = roundup(ifindex + 1, 32);
}
if (*ifmap[ifindex].ifname != '\0')
continue;
strlcpy(ifmap[ifindex].ifname, ifa->ifa_name, IFNAMSIZ);
}
freeifaddrs(ifap);
ifmap = prepare_ifmap(&ifmap_size);
mib[0] = CTL_NET;
mib[1] = PF_ROUTE;
@ -377,7 +330,8 @@ p_rtentry_sysctl(const char *name, struct rt_msghdr *rtm)
wid_flags - protrusion);
p_flags(rtm->rtm_flags, buffer);
if (Wflag) {
xo_emit("{t:use/%*lu} ", wid_pksent, rtm->rtm_rmx.rmx_pksent);
/* XXX: use=0? */
xo_emit("{t:nhop/%*lu} ", wid_mtu, rtm->rtm_rmx.rmx_nhidx);
if (rtm->rtm_rmx.rmx_mtu != 0)
xo_emit("{t:mtu/%*lu} ", wid_mtu, rtm->rtm_rmx.rmx_mtu);
@ -410,7 +364,7 @@ p_rtentry_sysctl(const char *name, struct rt_msghdr *rtm)
xo_close_instance(name);
}
static int
int
p_sockaddr(const char *name, struct sockaddr *sa, struct sockaddr *mask,
int flags, int width)
{
@ -442,7 +396,7 @@ p_sockaddr(const char *name, struct sockaddr *sa, struct sockaddr *mask,
return (protrusion);
}
static const char *
const char *
fmt_sockaddr(struct sockaddr *sa, struct sockaddr *mask, int flags)
{
static char buf[128];
@ -519,30 +473,10 @@ fmt_sockaddr(struct sockaddr *sa, struct sockaddr *mask, int flags)
static void
p_flags(int f, const char *format)
{
struct bits *p;
xo_emit(format, fmt_flags(f));
xo_open_list("flags_pretty");
for (p = bits; p->b_mask; p++)
if (p->b_mask & f)
xo_emit("{le:flags_pretty/%s}", p->b_name);
xo_close_list("flags_pretty");
print_flags_generic(f, rt_bits, format, "flags_pretty");
}
static const char *
fmt_flags(int f)
{
static char name[33];
char *flags;
struct bits *p = bits;
for (flags = name; p->b_mask; p++)
if (p->b_mask & f)
*flags++ = p->b_val;
*flags = '\0';
return (name);
}
char *
routename(struct sockaddr *sa, int flags)