From 1f91d8c5634363237e3b1b457dcb8e0f39918d5d Mon Sep 17 00:00:00 2001 From: David Greenman Date: Tue, 19 May 1998 14:04:36 +0000 Subject: [PATCH] Added fast IP forwarding code by Matt Thomas via NetBSD, ported to FreeBSD by Pierre Beyssac and minorly tweaked by me. This is a standard part of FreeBSD, but must be enabled with: "sysctl -w net.inet.ip.fastforwarding=1" ...and of course forwarding must also be enabled. This should probably be modified to use the zone allocator for speed and space efficiency. The current algorithm also appears to lose if the number of active paths exceeds IPFLOW_MAX (256), in which case it wastes lots of time trying to figure out which cache entry to drop. --- sys/conf/files | 1 + sys/net/if_ethersubr.c | 4 +- sys/net/if_fddisubr.c | 4 +- sys/net/if_ppp.c | 6 +- sys/netinet/in.h | 6 +- sys/netinet/in_var.h | 6 +- sys/netinet/ip_flow.c | 332 +++++++++++++++++++++++++++++++++++++++++ sys/netinet/ip_fw.c | 3 +- sys/netinet/ip_input.c | 9 +- sys/netinet/ip_var.h | 20 ++- 10 files changed, 380 insertions(+), 11 deletions(-) create mode 100644 sys/netinet/ip_flow.c diff --git a/sys/conf/files b/sys/conf/files index 1e79f04751f2..c73d92866a0e 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -268,6 +268,7 @@ netinet/in_rmx.c optional inet netinet/ip_auth.c optional ipfilter inet netinet/ip_divert.c optional ipdivert netinet/ip_fil.c optional ipfilter inet +netinet/ip_flow.c optional inet netinet/ip_frag.c optional ipfilter inet netinet/ip_fw.c optional ipfirewall netinet/ip_icmp.c optional inet diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index 3266f18db84d..d5fba1b61a3e 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)if_ethersubr.c 8.1 (Berkeley) 6/10/93 - * $Id: if_ethersubr.c,v 1.46 1998/03/18 01:40:11 wollman Exp $ + * $Id: if_ethersubr.c,v 1.47 1998/03/30 09:51:39 phk Exp $ */ #include "opt_atalk.h" @@ -501,6 +501,8 @@ ether_input(ifp, eh, m) switch (ether_type) { #ifdef INET case ETHERTYPE_IP: + if (ipflow_fastforward(m)) + return; schednetisr(NETISR_IP); inq = &ipintrq; break; diff --git a/sys/net/if_fddisubr.c b/sys/net/if_fddisubr.c index 5f5487894fe0..140ef0dc0592 100644 --- a/sys/net/if_fddisubr.c +++ b/sys/net/if_fddisubr.c @@ -33,7 +33,7 @@ * SUCH DAMAGE. * * from: if_ethersubr.c,v 1.5 1994/12/13 22:31:45 wollman Exp - * $Id: if_fddisubr.c,v 1.26 1998/02/20 13:11:49 bde Exp $ + * $Id: if_fddisubr.c,v 1.27 1998/03/30 09:51:44 phk Exp $ */ #include "opt_atalk.h" @@ -533,6 +533,8 @@ fddi_input(ifp, fh, m) switch (type) { #ifdef INET case ETHERTYPE_IP: + if (ipflow(fastforward(m)) + return; schednetisr(NETISR_IP); inq = &ipintrq; break; diff --git a/sys/net/if_ppp.c b/sys/net/if_ppp.c index 8e652451d2dd..bf43b646be55 100644 --- a/sys/net/if_ppp.c +++ b/sys/net/if_ppp.c @@ -69,7 +69,7 @@ * Paul Mackerras (paulus@cs.anu.edu.au). */ -/* $Id: if_ppp.c,v 1.55 1998/03/30 09:51:52 phk Exp $ */ +/* $Id: if_ppp.c,v 1.56 1998/04/06 11:43:10 phk Exp $ */ /* from if_sl.c,v 1.11 84/10/04 12:54:47 rick Exp */ /* from NetBSD: if_ppp.c,v 1.15.2.2 1994/07/28 05:17:58 cgd Exp */ @@ -1488,6 +1488,10 @@ ppp_inproc(sc, m) m->m_pkthdr.len -= PPP_HDRLEN; m->m_data += PPP_HDRLEN; m->m_len -= PPP_HDRLEN; + if (ipflow_fastforward(m)) { + sc->sc_last_recv = time_second; + return; + } schednetisr(NETISR_IP); inq = &ipintrq; sc->sc_last_recv = time_second; /* update time of last pkt rcvd */ diff --git a/sys/netinet/in.h b/sys/netinet/in.h index d36bb7406244..e5c68afaaf32 100644 --- a/sys/netinet/in.h +++ b/sys/netinet/in.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)in.h 8.3 (Berkeley) 1/3/94 - * $Id: in.h,v 1.31 1998/04/19 17:22:27 phk Exp $ + * $Id: in.h,v 1.32 1998/05/10 20:51:46 jb Exp $ */ #ifndef _NETINET_IN_H_ @@ -398,7 +398,8 @@ struct ip_mreq { #define IPCTL_INTRQDROPS 11 /* number of netisr q drops */ #define IPCTL_STATS 12 /* ipstat structure */ #define IPCTL_ACCEPTSOURCEROUTE 13 /* may accept source routed packets */ -#define IPCTL_MAXID 14 +#define IPCTL_FASTFORWARDING 14 /* use fast IP forwarding code */ +#define IPCTL_MAXID 15 #define IPCTL_NAMES { \ { 0, 0 }, \ @@ -415,6 +416,7 @@ struct ip_mreq { { "intr-queue-drops", CTLTYPE_INT }, \ { "stats", CTLTYPE_STRUCT }, \ { "accept_sourceroute", CTLTYPE_INT }, \ + { "fastforwarding", CTLTYPE_INT }, \ } diff --git a/sys/netinet/in_var.h b/sys/netinet/in_var.h index 83f5baa906ba..92549046163b 100644 --- a/sys/netinet/in_var.h +++ b/sys/netinet/in_var.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)in_var.h 8.2 (Berkeley) 1/9/95 - * $Id: in_var.h,v 1.26 1997/04/27 20:01:06 wollman Exp $ + * $Id: in_var.h,v 1.27 1997/09/07 05:26:43 bde Exp $ */ #ifndef _NETINET_IN_VAR_H_ @@ -211,6 +211,7 @@ do { \ IN_NEXT_MULTI((step), (inm)); \ } while(0) +struct route; struct in_multi *in_addmulti __P((struct in_addr *, struct ifnet *)); void in_delmulti __P((struct in_multi *)); int in_control __P((struct socket *, int, caddr_t, struct ifnet *, @@ -219,6 +220,9 @@ void in_rtqdrain __P((void)); void ip_input __P((struct mbuf *)); int in_ifadown __P((struct ifaddr *ifa)); void in_ifscrub __P((struct ifnet *, struct in_ifaddr *)); +int ipflow_fastforward __P((struct mbuf *)); +void ipflow_create __P((const struct route *, struct mbuf *)); +void ipflow_slowtimo __P((void)); #endif /* KERNEL */ diff --git a/sys/netinet/ip_flow.c b/sys/netinet/ip_flow.c new file mode 100644 index 000000000000..c8ab67fcedad --- /dev/null +++ b/sys/netinet/ip_flow.c @@ -0,0 +1,332 @@ +/*- + * Copyright (c) 1998 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by the 3am Software Foundry ("3am"). It was developed by Matt Thomas. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#define IPFLOW_TIMER (5 * PR_SLOWHZ) +#define IPFLOW_HASHSIZE (1 << IPFLOW_HASHBITS) +static LIST_HEAD(ipflowhead, ipflow) ipflows[IPFLOW_HASHSIZE]; +static int ipflow_inuse; +#define IPFLOW_MAX 256 + +static int ipflow_active = 0; +SYSCTL_INT(_net_inet_ip, IPCTL_FASTFORWARDING, fastforwarding, CTLFLAG_RW, + &ipflow_active, 0, ""); + +MALLOC_DEFINE(M_IPFLOW, "ip_flow", "IP flow"); + +static unsigned +ipflow_hash( + struct in_addr dst, + struct in_addr src, + unsigned tos) +{ + unsigned hash = tos; + int idx; + for (idx = 0; idx < 32; idx += IPFLOW_HASHBITS) + hash += (dst.s_addr >> (32 - idx)) + (src.s_addr >> idx); + return hash & (IPFLOW_HASHSIZE-1); +} + +static struct ipflow * +ipflow_lookup( + const struct ip *ip) +{ + unsigned hash; + struct ipflow *ipf; + + hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos); + + ipf = LIST_FIRST(&ipflows[hash]); + while (ipf != NULL) { + if (ip->ip_dst.s_addr == ipf->ipf_dst.s_addr + && ip->ip_src.s_addr == ipf->ipf_src.s_addr + && ip->ip_tos == ipf->ipf_tos) + break; + ipf = LIST_NEXT(ipf, ipf_next); + } + return ipf; +} + +int +ipflow_fastforward( + struct mbuf *m) +{ + struct ip *ip; + struct ipflow *ipf; + struct rtentry *rt; + u_int32_t sum; + int error; + + /* + * Are we forwarding packets? Big enough for an IP packet? + */ + if (!ipforwarding || !ipflow_active || m->m_len < sizeof(struct ip)) + return 0; + /* + * IP header with no option and valid version and length + */ + ip = mtod(m, struct ip *); + if (ip->ip_v != IPVERSION || ip->ip_hl != (sizeof(struct ip) >> 2) + || ntohs(ip->ip_len) > m->m_pkthdr.len) + return 0; + /* + * Find a flow. + */ + if ((ipf = ipflow_lookup(ip)) == NULL) + return 0; + + /* + * Route and interface still up? + */ + rt = ipf->ipf_ro.ro_rt; + if ((rt->rt_flags & RTF_UP) == 0 || (rt->rt_ifp->if_flags & IFF_UP) == 0) + return 0; + + /* + * Packet size OK? TTL? + */ + if (m->m_pkthdr.len > rt->rt_ifp->if_mtu || ip->ip_ttl <= IPTTLDEC) + return 0; + + /* + * Everything checks out and so we can forward this packet. + * Modify the TTL and incrementally change the checksum. + */ + ip->ip_ttl -= IPTTLDEC; + if (ip->ip_sum >= htons(0xffff - (IPTTLDEC << 8))) { + ip->ip_sum += htons(IPTTLDEC << 8) + 1; + } else { + ip->ip_sum += htons(IPTTLDEC << 8); + } + + /* + * Send the packet on its way. All we can get back is ENOBUFS + */ + ipf->ipf_uses++; + ipf->ipf_timer = IPFLOW_TIMER; + if ((error = (*rt->rt_ifp->if_output)(rt->rt_ifp, m, &ipf->ipf_ro.ro_dst, rt)) != 0) { + if (error == ENOBUFS) + ipf->ipf_dropped++; + else + ipf->ipf_errors++; + } + return 1; +} + +static void +ipflow_addstats( + struct ipflow *ipf) +{ + ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses; + ipstat.ips_cantforward += ipf->ipf_errors + ipf->ipf_dropped; + ipstat.ips_forward += ipf->ipf_uses; + ipstat.ips_fastforward += ipf->ipf_uses; +} + +static void +ipflow_free( + struct ipflow *ipf) +{ + int s; + /* + * Remove the flow from the hash table (at elevated IPL). + * Once it's off the list, we can deal with it at normal + * network IPL. + */ + s = splimp(); + LIST_REMOVE(ipf, ipf_next); + splx(s); + ipflow_addstats(ipf); + RTFREE(ipf->ipf_ro.ro_rt); + ipflow_inuse--; + FREE(ipf, M_IPFLOW); +} + +static struct ipflow * +ipflow_reap( + void) +{ + struct ipflow *ipf, *maybe_ipf = NULL; + int idx; + int s; + + for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) { + ipf = LIST_FIRST(&ipflows[idx]); + while (ipf != NULL) { + /* + * If this no longer points to a valid route + * reclaim it. + */ + if ((ipf->ipf_ro.ro_rt->rt_flags & RTF_UP) == 0) + goto done; + /* + * choose the one that's been least recently used + * or has had the least uses in the last 1.5 + * intervals. + */ + if (ipf == NULL + || ipf->ipf_timer < maybe_ipf->ipf_timer + || (ipf->ipf_timer == maybe_ipf->ipf_timer + && ipf->ipf_last_uses + ipf->ipf_uses < + maybe_ipf->ipf_last_uses + + maybe_ipf->ipf_uses)) + maybe_ipf = ipf; + ipf = LIST_NEXT(ipf, ipf_next); + } + } + ipf = maybe_ipf; + done: + /* + * Remove the entry from the flow table. + */ + s = splimp(); + LIST_REMOVE(ipf, ipf_next); + splx(s); + ipflow_addstats(ipf); + RTFREE(ipf->ipf_ro.ro_rt); + return ipf; +} + +void +ipflow_slowtimo( + void) +{ + struct ipflow *ipf; + int idx; + + for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) { + ipf = LIST_FIRST(&ipflows[idx]); + while (ipf != NULL) { + struct ipflow *next_ipf = LIST_NEXT(ipf, ipf_next); + if (--ipf->ipf_timer == 0) { + ipflow_free(ipf); + } else { + ipf->ipf_last_uses = ipf->ipf_uses; + ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses; + ipstat.ips_forward += ipf->ipf_uses; + ipstat.ips_fastforward += ipf->ipf_uses; + ipf->ipf_uses = 0; + } + ipf = next_ipf; + } + } +} + +void +ipflow_create( + const struct route *ro, + struct mbuf *m) +{ + const struct ip *const ip = mtod(m, struct ip *); + struct ipflow *ipf; + unsigned hash; + int s; + + /* + * Don't create cache entries for ICMP messages. + */ + if (!ipflow_active || ip->ip_p == IPPROTO_ICMP) + return; + /* + * See if an existing flow struct exists. If so remove it from it's + * list and free the old route. If not, try to malloc a new one + * (if we aren't at our limit). + */ + ipf = ipflow_lookup(ip); + if (ipf == NULL) { + if (ipflow_inuse == IPFLOW_MAX) { + ipf = ipflow_reap(); + } else { + ipf = (struct ipflow *) malloc(sizeof(*ipf), M_IPFLOW, + M_NOWAIT); + if (ipf == NULL) + return; + ipflow_inuse++; + } + bzero((caddr_t) ipf, sizeof(*ipf)); + } else { + s = splimp(); + LIST_REMOVE(ipf, ipf_next); + splx(s); + ipflow_addstats(ipf); + RTFREE(ipf->ipf_ro.ro_rt); + ipf->ipf_uses = ipf->ipf_last_uses = 0; + ipf->ipf_errors = ipf->ipf_dropped = 0; + } + + /* + * Fill in the updated information. + */ + ipf->ipf_ro = *ro; + ro->ro_rt->rt_refcnt++; + ipf->ipf_dst = ip->ip_dst; + ipf->ipf_src = ip->ip_src; + ipf->ipf_tos = ip->ip_tos; + ipf->ipf_timer = IPFLOW_TIMER; + /* + * Insert into the approriate bucket of the flow table. + */ + hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos); + s = splimp(); + LIST_INSERT_HEAD(&ipflows[hash], ipf, ipf_next); + splx(s); +} diff --git a/sys/netinet/ip_fw.c b/sys/netinet/ip_fw.c index 6d2614a9c8b0..bcdfda0afb57 100644 --- a/sys/netinet/ip_fw.c +++ b/sys/netinet/ip_fw.c @@ -12,7 +12,7 @@ * * This software is provided ``AS IS'' without any warranties of any kind. * - * $Id: ip_fw.c,v 1.81 1998/04/15 17:46:51 bde Exp $ + * $Id: ip_fw.c,v 1.82 1998/04/21 18:54:53 julian Exp $ */ /* @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index 4e07424e9759..5f0eced490a3 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 - * $Id: ip_input.c,v 1.81 1998/03/30 09:52:56 phk Exp $ + * $Id: ip_input.c,v 1.82 1998/04/13 17:27:08 phk Exp $ * $ANA: ip_input.c,v 1.5 1996/09/18 14:34:59 wollman Exp $ */ @@ -80,7 +80,7 @@ int rsvp_on = 0; static int ip_rsvp_on; struct socket *ip_rsvpd; -static int ipforwarding = 0; +int ipforwarding = 0; SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW, &ipforwarding, 0, ""); @@ -878,6 +878,7 @@ ip_slowtimo() } } } + ipflow_slowtimo(); splx(s); } @@ -1381,8 +1382,10 @@ ip_forward(m, srcrt) if (type) ipstat.ips_redirectsent++; else { - if (mcopy) + if (mcopy) { + ipflow_create(&ipforward_rt, mcopy); m_freem(mcopy); + } return; } } diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h index f80a353d41f0..5bd7257e0ca2 100644 --- a/sys/netinet/ip_var.h +++ b/sys/netinet/ip_var.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ip_var.h 8.2 (Berkeley) 1/9/95 - * $Id: ip_var.h,v 1.33 1997/05/25 06:09:23 peter Exp $ + * $Id: ip_var.h,v 1.34 1997/09/07 05:26:46 bde Exp $ */ #ifndef _NETINET_IP_VAR_H_ @@ -132,6 +132,7 @@ struct ipstat { u_long ips_fragdropped; /* frags dropped (dups, out of space) */ u_long ips_fragtimeout; /* fragments timed out */ u_long ips_forward; /* packets forwarded */ + u_long ips_fastforward; /* packets fast forwarded */ u_long ips_cantforward; /* packets rcvd for unreachable dest */ u_long ips_redirectsent; /* packets forwarded on same net */ u_long ips_noproto; /* unknown or unsupported protocol */ @@ -150,6 +151,22 @@ struct ipstat { u_long ips_notmember; /* multicasts for unregistered grps */ }; +#define IPFLOW_HASHBITS 6 /* should not be a multiple of 8 */ +struct ipflow { + LIST_ENTRY(ipflow) ipf_next; /* next ipflow in bucket */ + struct in_addr ipf_dst; /* destination address */ + struct in_addr ipf_src; /* source address */ + + u_int8_t ipf_tos; /* type-of-service */ + struct route ipf_ro; /* associated route entry */ + u_long ipf_uses; /* number of uses in this period */ + + int ipf_timer; /* remaining lifetime of this entry */ + u_long ipf_dropped; /* ENOBUFS returned by if_output */ + u_long ipf_errors; /* other errors returned by if_output */ + u_long ipf_last_uses; /* number of uses in last period */ +}; + #ifdef KERNEL /* flags passed to ip_output as last parameter */ #define IP_FORWARDING 0x1 /* most of ip header exists */ @@ -163,6 +180,7 @@ struct route; extern struct ipstat ipstat; extern u_short ip_id; /* ip packet ctr, for ids */ extern int ip_defttl; /* default IP ttl */ +extern int ipforwarding; /* ip forwarding */ extern u_char ip_protox[]; extern struct socket *ip_rsvpd; /* reservation protocol daemon */ extern struct socket *ip_mrouter; /* multicast routing daemon */