- change the interface to flowtable_lookup so that we don't rely on

the mbuf for obtaining the fib index
 - check that a cached flow corresponds to the same fib index as the
   packet for which we are doing the lookup
 - at interface detach time flush any flows referencing stale rtentrys
   associated with the interface that is going away (fixes reported
   panics)
 - reduce the time between cleans in case the cleaner is running at
   the time the eventhandler is called and the wakeup is missed less
   time will elapse before the eventhandler returns
 - separate per-vnet initialization from global initialization
   (pointed out by jeli@)

Reviewed by:	sam@
Approved by:	re@
This commit is contained in:
Kip Macy 2009-08-18 20:28:58 +00:00
parent 44d9075392
commit 3ee42584f9
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=196368
3 changed files with 196 additions and 43 deletions

View file

@ -29,6 +29,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "opt_route.h"
#include "opt_mpath.h"
#include "opt_ddb.h"
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
@ -36,6 +37,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/types.h>
#include <sys/bitstring.h>
#include <sys/condvar.h>
#include <sys/callout.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
@ -66,6 +68,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/sctp.h>
#include <libkern/jenkins.h>
#include <ddb/ddb.h>
struct ipv4_tuple {
uint16_t ip_sport; /* source port */
@ -94,8 +97,9 @@ union ipv6_flow {
struct flentry {
volatile uint32_t f_fhash; /* hash flowing forward */
uint16_t f_flags; /* flow flags */
uint8_t f_pad; /* alignment */
uint8_t f_pad;
uint8_t f_proto; /* protocol */
uint32_t f_fibnum; /* fib index */
uint32_t f_uptime; /* uptime at last access */
struct flentry *f_next; /* pointer to collision entry */
volatile struct rtentry *f_rt; /* rtentry for flow */
@ -173,6 +177,10 @@ static VNET_DEFINE(uma_zone_t, flow_ipv6_zone);
#define V_flow_ipv4_zone VNET(flow_ipv4_zone)
#define V_flow_ipv6_zone VNET(flow_ipv6_zone)
static struct cv flowclean_cv;
static struct mtx flowclean_lock;
static uint32_t flowclean_cycles;
/*
* TODO:
* - Make flowtable stats per-cpu, aggregated at sysctl call time,
@ -288,10 +296,10 @@ SYSCTL_VNET_PROC(_net_inet_flowtable, OID_AUTO, nmbflows,
#ifndef RADIX_MPATH
static void
in_rtalloc_ign_wrapper(struct route *ro, uint32_t hash, u_int fib)
in_rtalloc_ign_wrapper(struct route *ro, uint32_t hash, u_int fibnum)
{
rtalloc_ign_fib(ro, 0, fib);
rtalloc_ign_fib(ro, 0, fibnum);
}
#endif
@ -425,7 +433,7 @@ static bitstr_t *
flowtable_mask(struct flowtable *ft)
{
bitstr_t *mask;
if (ft->ft_flags & FL_PCPU)
mask = ft->ft_masks[curcpu];
else
@ -501,7 +509,7 @@ flowtable_set_hashkey(struct flentry *fle, uint32_t *key)
static int
flowtable_insert(struct flowtable *ft, uint32_t hash, uint32_t *key,
uint8_t proto, struct route *ro, uint16_t flags)
uint8_t proto, uint32_t fibnum, struct route *ro, uint16_t flags)
{
struct flentry *fle, *fletail, *newfle, **flep;
int depth;
@ -564,6 +572,7 @@ flowtable_insert(struct flowtable *ft, uint32_t hash, uint32_t *key,
fle->f_rt = ro->ro_rt;
fle->f_lle = ro->ro_lle;
fle->f_fhash = hash;
fle->f_fibnum = fibnum;
fle->f_uptime = time_uptime;
FL_ENTRY_UNLOCK(ft, hash);
return (0);
@ -591,13 +600,13 @@ flowtable_key_equal(struct flentry *fle, uint32_t *key)
}
int
flowtable_lookup(struct flowtable *ft, struct mbuf *m, struct route *ro)
flowtable_lookup(struct flowtable *ft, struct mbuf *m, struct route *ro, uint32_t fibnum)
{
uint32_t key[9], hash;
struct flentry *fle;
uint16_t flags;
uint8_t proto = 0;
int error = 0, fib = 0;
int error = 0;
struct rtentry *rt;
struct llentry *lle;
@ -640,6 +649,7 @@ flowtable_lookup(struct flowtable *ft, struct mbuf *m, struct route *ro)
&& fle->f_fhash == hash
&& flowtable_key_equal(fle, key)
&& (proto == fle->f_proto)
&& (fibnum == fle->f_fibnum)
&& (rt->rt_flags & RTF_UP)
&& (rt->rt_ifp != NULL)) {
V_flowtable_hits++;
@ -668,10 +678,8 @@ flowtable_lookup(struct flowtable *ft, struct mbuf *m, struct route *ro)
* of arpresolve with an rt_check variant that expected to
* receive the route locked
*/
if (m != NULL)
fib = M_GETFIB(m);
ft->ft_rtalloc(ro, hash, fib);
ft->ft_rtalloc(ro, hash, fibnum);
if (ro->ro_rt == NULL)
error = ENETUNREACH;
else {
@ -692,7 +700,7 @@ flowtable_lookup(struct flowtable *ft, struct mbuf *m, struct route *ro)
ro->ro_rt = NULL;
return (ENOENT);
}
error = flowtable_insert(ft, hash, key, proto,
error = flowtable_insert(ft, hash, key, proto, fibnum,
ro, flags);
if (error) {
@ -791,35 +799,6 @@ flowtable_alloc(int nentry, int flags)
return (ft);
}
static void
flowtable_init(const void *unused __unused)
{
V_flow_ipv4_zone = uma_zcreate("ip4flow", sizeof(struct flentry_v4),
NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);
V_flow_ipv6_zone = uma_zcreate("ip6flow", sizeof(struct flentry_v6),
NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);
uma_zone_set_max(V_flow_ipv4_zone, V_flowtable_nmbflows);
uma_zone_set_max(V_flow_ipv6_zone, V_flowtable_nmbflows);
V_flowtable_ready = 1;
}
VNET_SYSINIT(flowtable_init, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY,
flowtable_init, NULL);
#ifdef VIMAGE
static void
flowtable_uninit(const void *unused __unused)
{
uma_zdestroy(V_flow_ipv4_zone);
uma_zdestroy(V_flow_ipv6_zone);
}
VNET_SYSUNINIT(flowtable_uninit, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY,
flowtable_uninit, NULL);
#endif
/*
* The rest of the code is devoted to garbage collection of expired entries.
* It is a new additon made necessary by the switch to dynamically allocating
@ -973,14 +952,32 @@ flowtable_cleaner(void)
}
VNET_LIST_RUNLOCK();
flowclean_cycles++;
/*
* The 20 second interval between cleaning checks
* is arbitrary
*/
pause("flowcleanwait", 20*hz);
mtx_lock(&flowclean_lock);
cv_broadcast(&flowclean_cv);
cv_timedwait(&flowclean_cv, &flowclean_lock, 10*hz);
mtx_unlock(&flowclean_lock);
}
}
static void
flowtable_flush(void *unused __unused)
{
uint64_t start;
mtx_lock(&flowclean_lock);
start = flowclean_cycles;
while (start == flowclean_cycles) {
cv_broadcast(&flowclean_cv);
cv_wait(&flowclean_cv, &flowclean_lock);
}
mtx_unlock(&flowclean_lock);
}
static struct kproc_desc flow_kp = {
"flowcleaner",
flowtable_cleaner,
@ -988,3 +985,159 @@ static struct kproc_desc flow_kp = {
};
SYSINIT(flowcleaner, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, &flow_kp);
static void
flowtable_init_vnet(const void *unused __unused)
{
V_flow_ipv4_zone = uma_zcreate("ip4flow", sizeof(struct flentry_v4),
NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);
V_flow_ipv6_zone = uma_zcreate("ip6flow", sizeof(struct flentry_v6),
NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);
uma_zone_set_max(V_flow_ipv4_zone, V_flowtable_nmbflows);
uma_zone_set_max(V_flow_ipv6_zone, V_flowtable_nmbflows);
}
VNET_SYSINIT(flowtable_init_vnet, SI_SUB_KTHREAD_INIT, SI_ORDER_MIDDLE,
flowtable_init_vnet, NULL);
static void
flowtable_init(const void *unused __unused)
{
cv_init(&flowclean_cv, "flowcleanwait");
mtx_init(&flowclean_lock, "flowclean lock", NULL, MTX_DEF);
EVENTHANDLER_REGISTER(ifnet_departure_event, flowtable_flush, NULL,
EVENTHANDLER_PRI_ANY);
V_flowtable_ready = 1;
}
SYSINIT(flowtable_init, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY,
flowtable_init, NULL);
#ifdef VIMAGE
static void
flowtable_uninit(const void *unused __unused)
{
uma_zdestroy(V_flow_ipv4_zone);
uma_zdestroy(V_flow_ipv6_zone);
}
VNET_SYSUNINIT(flowtable_uninit, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY,
flowtable_uninit, NULL);
#endif
#ifdef DDB
static bitstr_t *
flowtable_mask_pcpu(struct flowtable *ft, int cpuid)
{
bitstr_t *mask;
if (ft->ft_flags & FL_PCPU)
mask = ft->ft_masks[cpuid];
else
mask = ft->ft_masks[0];
return (mask);
}
static struct flentry **
flowtable_entry_pcpu(struct flowtable *ft, uint32_t hash, int cpuid)
{
struct flentry **fle;
int index = (hash % ft->ft_size);
if (ft->ft_flags & FL_PCPU) {
fle = &ft->ft_table.pcpu[cpuid][index];
} else {
fle = &ft->ft_table.global[index];
}
return (fle);
}
static void
flow_show(struct flowtable *ft, struct flentry *fle)
{
int idle_time;
int rt_valid;
idle_time = (int)(time_uptime - fle->f_uptime);
rt_valid = fle->f_rt != NULL;
db_printf("hash=0x%08x idle_time=%03d rt=%p ifp=%p",
fle->f_fhash, idle_time,
fle->f_rt, rt_valid ? fle->f_rt->rt_ifp : NULL);
if (rt_valid && (fle->f_rt->rt_flags & RTF_UP))
db_printf(" RTF_UP ");
if (fle->f_flags & FL_STALE)
db_printf(" FL_STALE ");
db_printf("\n");
}
static void
flowtable_show(struct flowtable *ft, int cpuid)
{
int curbit = 0;
struct flentry *fle, **flehead;
bitstr_t *mask, *tmpmask;
db_printf("cpu: %d\n", cpuid);
mask = flowtable_mask_pcpu(ft, cpuid);
tmpmask = ft->ft_tmpmask;
memcpy(tmpmask, mask, ft->ft_size/8);
/*
* XXX Note to self, bit_ffs operates at the byte level
* and thus adds gratuitous overhead
*/
bit_ffs(tmpmask, ft->ft_size, &curbit);
while (curbit != -1) {
if (curbit >= ft->ft_size || curbit < -1) {
db_printf("warning: bad curbit value %d \n",
curbit);
break;
}
flehead = flowtable_entry_pcpu(ft, curbit, cpuid);
fle = *flehead;
while (fle != NULL) {
flow_show(ft, fle);
fle = fle->f_next;
continue;
}
bit_clear(tmpmask, curbit);
bit_ffs(tmpmask, ft->ft_size, &curbit);
}
}
static void
flowtable_show_vnet(void)
{
struct flowtable *ft;
int i;
ft = V_flow_list_head;
while (ft != NULL) {
if (ft->ft_flags & FL_PCPU) {
for (i = 0; i <= mp_maxid; i++) {
if (CPU_ABSENT(i))
continue;
flowtable_show(ft, i);
}
} else {
flowtable_show(ft, 0);
}
ft = ft->ft_next;
}
}
DB_SHOW_COMMAND(flowtables, db_show_flowtables)
{
VNET_ITERATOR_DECL(vnet_iter);
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
flowtable_show_vnet();
CURVNET_RESTORE();
}
}
#endif

View file

@ -49,7 +49,7 @@ struct flowtable *flowtable_alloc(int nentry, int flags);
*
*/
int flowtable_lookup(struct flowtable *ft, struct mbuf *m,
struct route *ro);
struct route *ro, uint32_t fibnum);
#endif /* _KERNEL */
#endif

View file

@ -157,7 +157,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
* longer than that long for the stability of ro_rt. The
* flow ID assignment must have happened before this point.
*/
if (flowtable_lookup(V_ip_ft, m, ro) == 0)
if (flowtable_lookup(V_ip_ft, m, ro, M_GETFIB(m)) == 0)
nortfree = 1;
#endif
}