Added fast IP forwarding code by Matt Thomas <matt@3am-software.com> via

NetBSD, ported to FreeBSD by Pierre Beyssac <pb@fasterix.freenix.org> and
minorly tweaked by me.
This is a standard part of FreeBSD, but must be enabled with:
"sysctl -w net.inet.ip.fastforwarding=1" ...and of course forwarding must
also be enabled. This should probably be modified to use the zone
allocator for speed and space efficiency. The current algorithm also
appears to lose if the number of active paths exceeds IPFLOW_MAX (256),
in which case it wastes lots of time trying to figure out which cache
entry to drop.
This commit is contained in:
David Greenman 1998-05-19 14:04:36 +00:00
parent d04c1186ec
commit 1f91d8c563
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=36192
10 changed files with 380 additions and 11 deletions

View file

@ -268,6 +268,7 @@ netinet/in_rmx.c optional inet
netinet/ip_auth.c optional ipfilter inet
netinet/ip_divert.c optional ipdivert
netinet/ip_fil.c optional ipfilter inet
netinet/ip_flow.c optional inet
netinet/ip_frag.c optional ipfilter inet
netinet/ip_fw.c optional ipfirewall
netinet/ip_icmp.c optional inet

View file

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)if_ethersubr.c 8.1 (Berkeley) 6/10/93
* $Id: if_ethersubr.c,v 1.46 1998/03/18 01:40:11 wollman Exp $
* $Id: if_ethersubr.c,v 1.47 1998/03/30 09:51:39 phk Exp $
*/
#include "opt_atalk.h"
@ -501,6 +501,8 @@ ether_input(ifp, eh, m)
switch (ether_type) {
#ifdef INET
case ETHERTYPE_IP:
if (ipflow_fastforward(m))
return;
schednetisr(NETISR_IP);
inq = &ipintrq;
break;

View file

@ -33,7 +33,7 @@
* SUCH DAMAGE.
*
* from: if_ethersubr.c,v 1.5 1994/12/13 22:31:45 wollman Exp
* $Id: if_fddisubr.c,v 1.26 1998/02/20 13:11:49 bde Exp $
* $Id: if_fddisubr.c,v 1.27 1998/03/30 09:51:44 phk Exp $
*/
#include "opt_atalk.h"
@ -533,6 +533,8 @@ fddi_input(ifp, fh, m)
switch (type) {
#ifdef INET
case ETHERTYPE_IP:
if (ipflow(fastforward(m))
return;
schednetisr(NETISR_IP);
inq = &ipintrq;
break;

View file

@ -69,7 +69,7 @@
* Paul Mackerras (paulus@cs.anu.edu.au).
*/
/* $Id: if_ppp.c,v 1.55 1998/03/30 09:51:52 phk Exp $ */
/* $Id: if_ppp.c,v 1.56 1998/04/06 11:43:10 phk Exp $ */
/* from if_sl.c,v 1.11 84/10/04 12:54:47 rick Exp */
/* from NetBSD: if_ppp.c,v 1.15.2.2 1994/07/28 05:17:58 cgd Exp */
@ -1488,6 +1488,10 @@ ppp_inproc(sc, m)
m->m_pkthdr.len -= PPP_HDRLEN;
m->m_data += PPP_HDRLEN;
m->m_len -= PPP_HDRLEN;
if (ipflow_fastforward(m)) {
sc->sc_last_recv = time_second;
return;
}
schednetisr(NETISR_IP);
inq = &ipintrq;
sc->sc_last_recv = time_second; /* update time of last pkt rcvd */

View file

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)in.h 8.3 (Berkeley) 1/3/94
* $Id: in.h,v 1.31 1998/04/19 17:22:27 phk Exp $
* $Id: in.h,v 1.32 1998/05/10 20:51:46 jb Exp $
*/
#ifndef _NETINET_IN_H_
@ -398,7 +398,8 @@ struct ip_mreq {
#define IPCTL_INTRQDROPS 11 /* number of netisr q drops */
#define IPCTL_STATS 12 /* ipstat structure */
#define IPCTL_ACCEPTSOURCEROUTE 13 /* may accept source routed packets */
#define IPCTL_MAXID 14
#define IPCTL_FASTFORWARDING 14 /* use fast IP forwarding code */
#define IPCTL_MAXID 15
#define IPCTL_NAMES { \
{ 0, 0 }, \
@ -415,6 +416,7 @@ struct ip_mreq {
{ "intr-queue-drops", CTLTYPE_INT }, \
{ "stats", CTLTYPE_STRUCT }, \
{ "accept_sourceroute", CTLTYPE_INT }, \
{ "fastforwarding", CTLTYPE_INT }, \
}

View file

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)in_var.h 8.2 (Berkeley) 1/9/95
* $Id: in_var.h,v 1.26 1997/04/27 20:01:06 wollman Exp $
* $Id: in_var.h,v 1.27 1997/09/07 05:26:43 bde Exp $
*/
#ifndef _NETINET_IN_VAR_H_
@ -211,6 +211,7 @@ do { \
IN_NEXT_MULTI((step), (inm)); \
} while(0)
struct route;
struct in_multi *in_addmulti __P((struct in_addr *, struct ifnet *));
void in_delmulti __P((struct in_multi *));
int in_control __P((struct socket *, int, caddr_t, struct ifnet *,
@ -219,6 +220,9 @@ void in_rtqdrain __P((void));
void ip_input __P((struct mbuf *));
int in_ifadown __P((struct ifaddr *ifa));
void in_ifscrub __P((struct ifnet *, struct in_ifaddr *));
int ipflow_fastforward __P((struct mbuf *));
void ipflow_create __P((const struct route *, struct mbuf *));
void ipflow_slowtimo __P((void));
#endif /* KERNEL */

332
sys/netinet/ip_flow.c Normal file
View file

@ -0,0 +1,332 @@
/*-
* Copyright (c) 1998 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by the 3am Software Foundry ("3am"). It was developed by Matt Thomas.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the NetBSD
* Foundation, Inc. and its contributors.
* 4. Neither the name of The NetBSD Foundation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/domain.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/errno.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <vm/vm.h>
#include <sys/sysctl.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
#include <netinet/in_var.h>
#include <netinet/ip_var.h>
#define IPFLOW_TIMER (5 * PR_SLOWHZ)
#define IPFLOW_HASHSIZE (1 << IPFLOW_HASHBITS)
static LIST_HEAD(ipflowhead, ipflow) ipflows[IPFLOW_HASHSIZE];
static int ipflow_inuse;
#define IPFLOW_MAX 256
static int ipflow_active = 0;
SYSCTL_INT(_net_inet_ip, IPCTL_FASTFORWARDING, fastforwarding, CTLFLAG_RW,
&ipflow_active, 0, "");
MALLOC_DEFINE(M_IPFLOW, "ip_flow", "IP flow");
static unsigned
ipflow_hash(
struct in_addr dst,
struct in_addr src,
unsigned tos)
{
unsigned hash = tos;
int idx;
for (idx = 0; idx < 32; idx += IPFLOW_HASHBITS)
hash += (dst.s_addr >> (32 - idx)) + (src.s_addr >> idx);
return hash & (IPFLOW_HASHSIZE-1);
}
static struct ipflow *
ipflow_lookup(
const struct ip *ip)
{
unsigned hash;
struct ipflow *ipf;
hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos);
ipf = LIST_FIRST(&ipflows[hash]);
while (ipf != NULL) {
if (ip->ip_dst.s_addr == ipf->ipf_dst.s_addr
&& ip->ip_src.s_addr == ipf->ipf_src.s_addr
&& ip->ip_tos == ipf->ipf_tos)
break;
ipf = LIST_NEXT(ipf, ipf_next);
}
return ipf;
}
int
ipflow_fastforward(
struct mbuf *m)
{
struct ip *ip;
struct ipflow *ipf;
struct rtentry *rt;
u_int32_t sum;
int error;
/*
* Are we forwarding packets? Big enough for an IP packet?
*/
if (!ipforwarding || !ipflow_active || m->m_len < sizeof(struct ip))
return 0;
/*
* IP header with no option and valid version and length
*/
ip = mtod(m, struct ip *);
if (ip->ip_v != IPVERSION || ip->ip_hl != (sizeof(struct ip) >> 2)
|| ntohs(ip->ip_len) > m->m_pkthdr.len)
return 0;
/*
* Find a flow.
*/
if ((ipf = ipflow_lookup(ip)) == NULL)
return 0;
/*
* Route and interface still up?
*/
rt = ipf->ipf_ro.ro_rt;
if ((rt->rt_flags & RTF_UP) == 0 || (rt->rt_ifp->if_flags & IFF_UP) == 0)
return 0;
/*
* Packet size OK? TTL?
*/
if (m->m_pkthdr.len > rt->rt_ifp->if_mtu || ip->ip_ttl <= IPTTLDEC)
return 0;
/*
* Everything checks out and so we can forward this packet.
* Modify the TTL and incrementally change the checksum.
*/
ip->ip_ttl -= IPTTLDEC;
if (ip->ip_sum >= htons(0xffff - (IPTTLDEC << 8))) {
ip->ip_sum += htons(IPTTLDEC << 8) + 1;
} else {
ip->ip_sum += htons(IPTTLDEC << 8);
}
/*
* Send the packet on its way. All we can get back is ENOBUFS
*/
ipf->ipf_uses++;
ipf->ipf_timer = IPFLOW_TIMER;
if ((error = (*rt->rt_ifp->if_output)(rt->rt_ifp, m, &ipf->ipf_ro.ro_dst, rt)) != 0) {
if (error == ENOBUFS)
ipf->ipf_dropped++;
else
ipf->ipf_errors++;
}
return 1;
}
static void
ipflow_addstats(
struct ipflow *ipf)
{
ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses;
ipstat.ips_cantforward += ipf->ipf_errors + ipf->ipf_dropped;
ipstat.ips_forward += ipf->ipf_uses;
ipstat.ips_fastforward += ipf->ipf_uses;
}
static void
ipflow_free(
struct ipflow *ipf)
{
int s;
/*
* Remove the flow from the hash table (at elevated IPL).
* Once it's off the list, we can deal with it at normal
* network IPL.
*/
s = splimp();
LIST_REMOVE(ipf, ipf_next);
splx(s);
ipflow_addstats(ipf);
RTFREE(ipf->ipf_ro.ro_rt);
ipflow_inuse--;
FREE(ipf, M_IPFLOW);
}
static struct ipflow *
ipflow_reap(
void)
{
struct ipflow *ipf, *maybe_ipf = NULL;
int idx;
int s;
for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) {
ipf = LIST_FIRST(&ipflows[idx]);
while (ipf != NULL) {
/*
* If this no longer points to a valid route
* reclaim it.
*/
if ((ipf->ipf_ro.ro_rt->rt_flags & RTF_UP) == 0)
goto done;
/*
* choose the one that's been least recently used
* or has had the least uses in the last 1.5
* intervals.
*/
if (ipf == NULL
|| ipf->ipf_timer < maybe_ipf->ipf_timer
|| (ipf->ipf_timer == maybe_ipf->ipf_timer
&& ipf->ipf_last_uses + ipf->ipf_uses <
maybe_ipf->ipf_last_uses +
maybe_ipf->ipf_uses))
maybe_ipf = ipf;
ipf = LIST_NEXT(ipf, ipf_next);
}
}
ipf = maybe_ipf;
done:
/*
* Remove the entry from the flow table.
*/
s = splimp();
LIST_REMOVE(ipf, ipf_next);
splx(s);
ipflow_addstats(ipf);
RTFREE(ipf->ipf_ro.ro_rt);
return ipf;
}
void
ipflow_slowtimo(
void)
{
struct ipflow *ipf;
int idx;
for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) {
ipf = LIST_FIRST(&ipflows[idx]);
while (ipf != NULL) {
struct ipflow *next_ipf = LIST_NEXT(ipf, ipf_next);
if (--ipf->ipf_timer == 0) {
ipflow_free(ipf);
} else {
ipf->ipf_last_uses = ipf->ipf_uses;
ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses;
ipstat.ips_forward += ipf->ipf_uses;
ipstat.ips_fastforward += ipf->ipf_uses;
ipf->ipf_uses = 0;
}
ipf = next_ipf;
}
}
}
void
ipflow_create(
const struct route *ro,
struct mbuf *m)
{
const struct ip *const ip = mtod(m, struct ip *);
struct ipflow *ipf;
unsigned hash;
int s;
/*
* Don't create cache entries for ICMP messages.
*/
if (!ipflow_active || ip->ip_p == IPPROTO_ICMP)
return;
/*
* See if an existing flow struct exists. If so remove it from it's
* list and free the old route. If not, try to malloc a new one
* (if we aren't at our limit).
*/
ipf = ipflow_lookup(ip);
if (ipf == NULL) {
if (ipflow_inuse == IPFLOW_MAX) {
ipf = ipflow_reap();
} else {
ipf = (struct ipflow *) malloc(sizeof(*ipf), M_IPFLOW,
M_NOWAIT);
if (ipf == NULL)
return;
ipflow_inuse++;
}
bzero((caddr_t) ipf, sizeof(*ipf));
} else {
s = splimp();
LIST_REMOVE(ipf, ipf_next);
splx(s);
ipflow_addstats(ipf);
RTFREE(ipf->ipf_ro.ro_rt);
ipf->ipf_uses = ipf->ipf_last_uses = 0;
ipf->ipf_errors = ipf->ipf_dropped = 0;
}
/*
* Fill in the updated information.
*/
ipf->ipf_ro = *ro;
ro->ro_rt->rt_refcnt++;
ipf->ipf_dst = ip->ip_dst;
ipf->ipf_src = ip->ip_src;
ipf->ipf_tos = ip->ip_tos;
ipf->ipf_timer = IPFLOW_TIMER;
/*
* Insert into the approriate bucket of the flow table.
*/
hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos);
s = splimp();
LIST_INSERT_HEAD(&ipflows[hash], ipf, ipf_next);
splx(s);
}

View file

@ -12,7 +12,7 @@
*
* This software is provided ``AS IS'' without any warranties of any kind.
*
* $Id: ip_fw.c,v 1.81 1998/04/15 17:46:51 bde Exp $
* $Id: ip_fw.c,v 1.82 1998/04/21 18:54:53 julian Exp $
*/
/*
@ -36,6 +36,7 @@
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <net/if.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>

View file

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)ip_input.c 8.2 (Berkeley) 1/4/94
* $Id: ip_input.c,v 1.81 1998/03/30 09:52:56 phk Exp $
* $Id: ip_input.c,v 1.82 1998/04/13 17:27:08 phk Exp $
* $ANA: ip_input.c,v 1.5 1996/09/18 14:34:59 wollman Exp $
*/
@ -80,7 +80,7 @@ int rsvp_on = 0;
static int ip_rsvp_on;
struct socket *ip_rsvpd;
static int ipforwarding = 0;
int ipforwarding = 0;
SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW,
&ipforwarding, 0, "");
@ -878,6 +878,7 @@ ip_slowtimo()
}
}
}
ipflow_slowtimo();
splx(s);
}
@ -1381,8 +1382,10 @@ ip_forward(m, srcrt)
if (type)
ipstat.ips_redirectsent++;
else {
if (mcopy)
if (mcopy) {
ipflow_create(&ipforward_rt, mcopy);
m_freem(mcopy);
}
return;
}
}

View file

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)ip_var.h 8.2 (Berkeley) 1/9/95
* $Id: ip_var.h,v 1.33 1997/05/25 06:09:23 peter Exp $
* $Id: ip_var.h,v 1.34 1997/09/07 05:26:46 bde Exp $
*/
#ifndef _NETINET_IP_VAR_H_
@ -132,6 +132,7 @@ struct ipstat {
u_long ips_fragdropped; /* frags dropped (dups, out of space) */
u_long ips_fragtimeout; /* fragments timed out */
u_long ips_forward; /* packets forwarded */
u_long ips_fastforward; /* packets fast forwarded */
u_long ips_cantforward; /* packets rcvd for unreachable dest */
u_long ips_redirectsent; /* packets forwarded on same net */
u_long ips_noproto; /* unknown or unsupported protocol */
@ -150,6 +151,22 @@ struct ipstat {
u_long ips_notmember; /* multicasts for unregistered grps */
};
#define IPFLOW_HASHBITS 6 /* should not be a multiple of 8 */
struct ipflow {
LIST_ENTRY(ipflow) ipf_next; /* next ipflow in bucket */
struct in_addr ipf_dst; /* destination address */
struct in_addr ipf_src; /* source address */
u_int8_t ipf_tos; /* type-of-service */
struct route ipf_ro; /* associated route entry */
u_long ipf_uses; /* number of uses in this period */
int ipf_timer; /* remaining lifetime of this entry */
u_long ipf_dropped; /* ENOBUFS returned by if_output */
u_long ipf_errors; /* other errors returned by if_output */
u_long ipf_last_uses; /* number of uses in last period */
};
#ifdef KERNEL
/* flags passed to ip_output as last parameter */
#define IP_FORWARDING 0x1 /* most of ip header exists */
@ -163,6 +180,7 @@ struct route;
extern struct ipstat ipstat;
extern u_short ip_id; /* ip packet ctr, for ids */
extern int ip_defttl; /* default IP ttl */
extern int ipforwarding; /* ip forwarding */
extern u_char ip_protox[];
extern struct socket *ip_rsvpd; /* reservation protocol daemon */
extern struct socket *ip_mrouter; /* multicast routing daemon */