mirror of
https://github.com/freebsd/freebsd-src
synced 2024-07-21 18:27:22 +00:00
ip6_output: Reduce cache misses on pktopts
When profiling an IP6 heavy workload, I noticed that we were getting a lot of cache misses in ip6_output() around ip6_pktopts. This was happening because the TCP stack passes inp->in6p_outputopts even if all options are unused. So in the common case of no options present, pkt_opts is not null, and is checked repeatedly for different options. Since ip6_pktopts is large (4 cachelines), and every field is checked, we take 4 cache misses (2 of which tend to be hidden by the adjacent line prefetcher). To fix this common case, I introduced a new flag in ip6_pktopts (ip6po_valid) which tracks which options have been set. In the common case where nothing is set, this causes just a single cache miss to load. It also eliminates a test for some options (if (opt != NULL && opt->val >= const) vs if ((optvalid & flag) !=0 ) To keep the struct the same size in 64-bit kernels, and to keep the integer values (like ip6po_hlim, ip6po_tclass, etc) on the same cacheline, I moved them to the top. As suggested by zlei, the null check in MAKE_EXTHDR() becomes redundant, and can be removed. For our web server workload (with the ip6po_tclass option set), this drops the CPI from 2.9 to 2.4 for ip6_output Differential Revision: https://reviews.freebsd.org/D44204 Reviewed by: bz, glebius, zlei No Objection from: melifaro Sponsored by: Netflix Inc.
This commit is contained in:
parent
3a9ddfa1ab
commit
530c2c30b0
|
@ -159,14 +159,12 @@ static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
|
|||
*/
|
||||
#define MAKE_EXTHDR(hp, mp, _ol) \
|
||||
do { \
|
||||
if (hp) { \
|
||||
struct ip6_ext *eh = (struct ip6_ext *)(hp); \
|
||||
error = ip6_copyexthdr((mp), (caddr_t)(hp), \
|
||||
((eh)->ip6e_len + 1) << 3); \
|
||||
if (error) \
|
||||
goto freehdrs; \
|
||||
(_ol) += (*(mp))->m_len; \
|
||||
} \
|
||||
struct ip6_ext *eh = (struct ip6_ext *)(hp); \
|
||||
error = ip6_copyexthdr((mp), (caddr_t)(hp), \
|
||||
((eh)->ip6e_len + 1) << 3); \
|
||||
if (error) \
|
||||
goto freehdrs; \
|
||||
(_ol) += (*(mp))->m_len; \
|
||||
} while (/*CONSTCOND*/ 0)
|
||||
|
||||
/*
|
||||
|
@ -431,6 +429,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
|
|||
uint32_t fibnum;
|
||||
struct m_tag *fwd_tag = NULL;
|
||||
uint32_t id;
|
||||
uint32_t optvalid;
|
||||
|
||||
NET_EPOCH_ASSERT();
|
||||
|
||||
|
@ -491,14 +490,17 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
|
|||
* Keep the length of the unfragmentable part for fragmentation.
|
||||
*/
|
||||
bzero(&exthdrs, sizeof(exthdrs));
|
||||
optlen = 0;
|
||||
optlen = optvalid = 0;
|
||||
unfragpartlen = sizeof(struct ip6_hdr);
|
||||
if (opt) {
|
||||
optvalid = opt->ip6po_valid;
|
||||
|
||||
/* Hop-by-Hop options header. */
|
||||
MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh, optlen);
|
||||
if ((optvalid & IP6PO_VALID_HBH) != 0)
|
||||
MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh, optlen);
|
||||
|
||||
/* Destination options header (1st part). */
|
||||
if (opt->ip6po_rthdr) {
|
||||
if ((optvalid & IP6PO_VALID_RHINFO) != 0) {
|
||||
#ifndef RTHDR_SUPPORT_IMPLEMENTED
|
||||
/*
|
||||
* If there is a routing header, discard the packet
|
||||
|
@ -524,11 +526,13 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
|
|||
* options, which might automatically be inserted in
|
||||
* the kernel.
|
||||
*/
|
||||
MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1,
|
||||
optlen);
|
||||
if ((optvalid & IP6PO_VALID_DEST1) != 0)
|
||||
MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1,
|
||||
optlen);
|
||||
}
|
||||
/* Routing header. */
|
||||
MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr, optlen);
|
||||
if ((optvalid & IP6PO_VALID_RHINFO) != 0)
|
||||
MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr, optlen);
|
||||
|
||||
unfragpartlen += optlen;
|
||||
|
||||
|
@ -538,7 +542,8 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
|
|||
*/
|
||||
|
||||
/* Destination options header (2nd part). */
|
||||
MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2, optlen);
|
||||
if ((optvalid & IP6PO_VALID_DEST2) != 0)
|
||||
MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2, optlen);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -627,7 +632,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
|
|||
|
||||
/* Route packet. */
|
||||
ro_pmtu = ro;
|
||||
if (opt && opt->ip6po_rthdr)
|
||||
if ((optvalid & IP6PO_VALID_RHINFO) != 0)
|
||||
ro = &opt->ip6po_route;
|
||||
if (ro != NULL)
|
||||
dst = (struct sockaddr_in6 *)&ro->ro_dst;
|
||||
|
@ -641,7 +646,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
|
|||
* Do not override if a non-zero value is already set.
|
||||
* We check the diffserv field and the ECN field separately.
|
||||
*/
|
||||
if (opt && opt->ip6po_tclass >= 0) {
|
||||
if ((optvalid & IP6PO_VALID_TC) != 0){
|
||||
int mask = 0;
|
||||
|
||||
if (IPV6_DSCP(ip6) == 0)
|
||||
|
@ -653,7 +658,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
|
|||
}
|
||||
|
||||
/* Fill in or override the hop limit field, if necessary. */
|
||||
if (opt && opt->ip6po_hlim != -1)
|
||||
if ((optvalid & IP6PO_VALID_HLIM) != 0)
|
||||
ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
|
||||
else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
|
||||
if (im6o != NULL)
|
||||
|
@ -855,7 +860,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
|
|||
/* All scope ID checks are successful. */
|
||||
|
||||
if (nh && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
|
||||
if (opt && opt->ip6po_nextroute.ro_nh) {
|
||||
if ((optvalid & IP6PO_VALID_NHINFO) != 0) {
|
||||
/*
|
||||
* The nexthop is explicitly specified by the
|
||||
* application. We assume the next hop is an IPv6
|
||||
|
@ -2648,10 +2653,14 @@ ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
|
|||
free(pktopt->ip6po_pktinfo, M_IP6OPT);
|
||||
pktopt->ip6po_pktinfo = NULL;
|
||||
}
|
||||
if (optname == -1 || optname == IPV6_HOPLIMIT)
|
||||
if (optname == -1 || optname == IPV6_HOPLIMIT) {
|
||||
pktopt->ip6po_hlim = -1;
|
||||
if (optname == -1 || optname == IPV6_TCLASS)
|
||||
pktopt->ip6po_valid &= ~IP6PO_VALID_HLIM;
|
||||
}
|
||||
if (optname == -1 || optname == IPV6_TCLASS) {
|
||||
pktopt->ip6po_tclass = -1;
|
||||
pktopt->ip6po_valid &= ~IP6PO_VALID_TC;
|
||||
}
|
||||
if (optname == -1 || optname == IPV6_NEXTHOP) {
|
||||
if (pktopt->ip6po_nextroute.ro_nh) {
|
||||
NH_FREE(pktopt->ip6po_nextroute.ro_nh);
|
||||
|
@ -2660,16 +2669,19 @@ ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
|
|||
if (pktopt->ip6po_nexthop)
|
||||
free(pktopt->ip6po_nexthop, M_IP6OPT);
|
||||
pktopt->ip6po_nexthop = NULL;
|
||||
pktopt->ip6po_valid &= ~IP6PO_VALID_NHINFO;
|
||||
}
|
||||
if (optname == -1 || optname == IPV6_HOPOPTS) {
|
||||
if (pktopt->ip6po_hbh)
|
||||
free(pktopt->ip6po_hbh, M_IP6OPT);
|
||||
pktopt->ip6po_hbh = NULL;
|
||||
pktopt->ip6po_valid &= ~IP6PO_VALID_HBH;
|
||||
}
|
||||
if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
|
||||
if (pktopt->ip6po_dest1)
|
||||
free(pktopt->ip6po_dest1, M_IP6OPT);
|
||||
pktopt->ip6po_dest1 = NULL;
|
||||
pktopt->ip6po_valid &= ~IP6PO_VALID_DEST1;
|
||||
}
|
||||
if (optname == -1 || optname == IPV6_RTHDR) {
|
||||
if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
|
||||
|
@ -2679,11 +2691,13 @@ ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
|
|||
NH_FREE(pktopt->ip6po_route.ro_nh);
|
||||
pktopt->ip6po_route.ro_nh = NULL;
|
||||
}
|
||||
pktopt->ip6po_valid &= ~IP6PO_VALID_RHINFO;
|
||||
}
|
||||
if (optname == -1 || optname == IPV6_DSTOPTS) {
|
||||
if (pktopt->ip6po_dest2)
|
||||
free(pktopt->ip6po_dest2, M_IP6OPT);
|
||||
pktopt->ip6po_dest2 = NULL;
|
||||
pktopt->ip6po_valid &= ~IP6PO_VALID_DEST2;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2730,6 +2744,7 @@ copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
|
|||
PKTOPT_EXTHDRCPY(ip6po_dest1);
|
||||
PKTOPT_EXTHDRCPY(ip6po_dest2);
|
||||
PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
|
||||
dst->ip6po_valid = src->ip6po_valid;
|
||||
return (0);
|
||||
|
||||
bad:
|
||||
|
@ -2959,6 +2974,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
|
|||
return (ENOBUFS);
|
||||
}
|
||||
bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
|
||||
opt->ip6po_valid |= IP6PO_VALID_PKTINFO;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -2981,6 +2997,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
|
|||
return (EINVAL);
|
||||
|
||||
opt->ip6po_hlim = *hlimp;
|
||||
opt->ip6po_valid |= IP6PO_VALID_HLIM;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -2995,6 +3012,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
|
|||
return (EINVAL);
|
||||
|
||||
opt->ip6po_tclass = tclass;
|
||||
opt->ip6po_valid |= IP6PO_VALID_TC;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -3045,6 +3063,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
|
|||
if (opt->ip6po_nexthop == NULL)
|
||||
return (ENOBUFS);
|
||||
bcopy(buf, opt->ip6po_nexthop, *buf);
|
||||
opt->ip6po_valid |= IP6PO_VALID_NHINFO;
|
||||
break;
|
||||
|
||||
case IPV6_2292HOPOPTS:
|
||||
|
@ -3083,6 +3102,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
|
|||
if (opt->ip6po_hbh == NULL)
|
||||
return (ENOBUFS);
|
||||
bcopy(hbh, opt->ip6po_hbh, hbhlen);
|
||||
opt->ip6po_valid |= IP6PO_VALID_HBH;
|
||||
|
||||
break;
|
||||
}
|
||||
|
@ -3150,6 +3170,10 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
|
|||
if (*newdest == NULL)
|
||||
return (ENOBUFS);
|
||||
bcopy(dest, *newdest, destlen);
|
||||
if (newdest == &opt->ip6po_dest1)
|
||||
opt->ip6po_valid |= IP6PO_VALID_DEST1;
|
||||
else
|
||||
opt->ip6po_valid |= IP6PO_VALID_DEST2;
|
||||
|
||||
break;
|
||||
}
|
||||
|
@ -3192,6 +3216,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
|
|||
if (opt->ip6po_rthdr == NULL)
|
||||
return (ENOBUFS);
|
||||
bcopy(rth, opt->ip6po_rthdr, rthlen);
|
||||
opt->ip6po_valid |= IP6PO_VALID_RHINFO;
|
||||
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -130,27 +130,26 @@ struct ip6po_nhinfo {
|
|||
#define ip6po_nexthop ip6po_nhinfo.ip6po_nhi_nexthop
|
||||
#define ip6po_nextroute ip6po_nhinfo.ip6po_nhi_route
|
||||
|
||||
/*
|
||||
* Note that fields with valid data must be flagged in ip6po_valid.
|
||||
* This is done to reduce cache misses in ip6_output(). Before
|
||||
* ip6po_valid, ip6_output needed to check all the individual fields
|
||||
* of ip6_pktopts needed to be checked themselves, and they are spread
|
||||
* across 4 cachelines. ip6_output() is currently the only consumer of
|
||||
* these flags, as it is in the critical path of every packet sent.
|
||||
*/
|
||||
struct ip6_pktopts {
|
||||
struct mbuf *ip6po_m; /* Pointer to mbuf storing the data */
|
||||
uint32_t ip6po_valid;
|
||||
#define IP6PO_VALID_HLIM 0x0001
|
||||
#define IP6PO_VALID_PKTINFO 0x0002
|
||||
#define IP6PO_VALID_NHINFO 0x0004
|
||||
#define IP6PO_VALID_HBH 0x0008
|
||||
#define IP6PO_VALID_DEST1 0x0010
|
||||
#define IP6PO_VALID_RHINFO 0x0020
|
||||
#define IP6PO_VALID_DEST2 0x0040
|
||||
#define IP6PO_VALID_TC 0x0080
|
||||
|
||||
int ip6po_hlim; /* Hoplimit for outgoing packets */
|
||||
|
||||
/* Outgoing IF/address information */
|
||||
struct in6_pktinfo *ip6po_pktinfo;
|
||||
|
||||
/* Next-hop address information */
|
||||
struct ip6po_nhinfo ip6po_nhinfo;
|
||||
|
||||
struct ip6_hbh *ip6po_hbh; /* Hop-by-Hop options header */
|
||||
|
||||
/* Destination options header (before a routing header) */
|
||||
struct ip6_dest *ip6po_dest1;
|
||||
|
||||
/* Routing header related info. */
|
||||
struct ip6po_rhinfo ip6po_rhinfo;
|
||||
|
||||
/* Destination options header (after a routing header) */
|
||||
struct ip6_dest *ip6po_dest2;
|
||||
|
||||
int ip6po_tclass; /* traffic class */
|
||||
|
||||
int ip6po_minmtu; /* fragment vs PMTU discovery policy */
|
||||
|
@ -171,6 +170,25 @@ struct ip6_pktopts {
|
|||
#endif
|
||||
#define IP6PO_DONTFRAG 0x04 /* disable fragmentation (IPV6_DONTFRAG) */
|
||||
#define IP6PO_USECOA 0x08 /* use care of address */
|
||||
|
||||
struct mbuf *ip6po_m; /* Pointer to mbuf storing the data */
|
||||
|
||||
/* Outgoing IF/address information */
|
||||
struct in6_pktinfo *ip6po_pktinfo;
|
||||
|
||||
/* Next-hop address information */
|
||||
struct ip6po_nhinfo ip6po_nhinfo;
|
||||
|
||||
struct ip6_hbh *ip6po_hbh; /* Hop-by-Hop options header */
|
||||
|
||||
/* Destination options header (before a routing header) */
|
||||
struct ip6_dest *ip6po_dest1;
|
||||
|
||||
/* Routing header related info. */
|
||||
struct ip6po_rhinfo ip6po_rhinfo;
|
||||
|
||||
/* Destination options header (after a routing header) */
|
||||
struct ip6_dest *ip6po_dest2;
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
Loading…
Reference in a new issue