diff --git a/sys/dev/oce/oce_if.c b/sys/dev/oce/oce_if.c index af57491e1fa2..68985b838bf5 100644 --- a/sys/dev/oce/oce_if.c +++ b/sys/dev/oce/oce_if.c @@ -1731,7 +1731,10 @@ oce_attach_ifp(POCE_SOFTC sc) sc->ifp->if_baudrate = IF_Gbps(10); #if __FreeBSD_version >= 1000000 - sc->ifp->if_hw_tsomax = OCE_MAX_TSO_SIZE; + sc->ifp->if_hw_tsomax = IF_HW_TSOMAX_BUILD_VALUE( + 65535 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN) /* bytes */, + OCE_MAX_TX_ELEMENTS /* maximum frag count */, + 12 /* 4K frag size */); #endif ether_ifattach(sc->ifp, sc->macaddr.mac_addr); diff --git a/sys/dev/oce/oce_if.h b/sys/dev/oce/oce_if.h index b6db402e7766..bb788413faa0 100644 --- a/sys/dev/oce/oce_if.h +++ b/sys/dev/oce/oce_if.h @@ -152,7 +152,6 @@ extern int mp_ncpus; /* system's total active cpu cores */ #define OCE_MAX_TX_ELEMENTS 29 #define OCE_MAX_TX_DESC 1024 #define OCE_MAX_TX_SIZE 65535 -#define OCE_MAX_TSO_SIZE (65535 - ETHER_HDR_LEN) #define OCE_MAX_RX_SIZE 4096 #define OCE_MAX_RQ_POSTS 255 #define OCE_DEFAULT_PROMISCUOUS 0 diff --git a/sys/dev/vmware/vmxnet3/if_vmx.c b/sys/dev/vmware/vmxnet3/if_vmx.c index f3fde92e3f7a..dd0eb6c7fc8d 100644 --- a/sys/dev/vmware/vmxnet3/if_vmx.c +++ b/sys/dev/vmware/vmxnet3/if_vmx.c @@ -1722,7 +1722,11 @@ vmxnet3_setup_interface(struct vmxnet3_softc *sc) ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_init = vmxnet3_init; ifp->if_ioctl = vmxnet3_ioctl; - ifp->if_hw_tsomax = VMXNET3_TSO_MAXSIZE; + + ifp->if_hw_tsomax = IF_HW_TSOMAX_BUILD_VALUE( + 65535 - sizeof(struct ether_vlan_header) /* bytes */, + VMXNET3_TX_MAXSEGS /* maximum frag count */, + VMXNET3_TX_MAXSEGSHIFT /* frag size */); #ifdef VMXNET3_LEGACY_TX ifp->if_start = vmxnet3_start; diff --git a/sys/dev/vmware/vmxnet3/if_vmxvar.h b/sys/dev/vmware/vmxnet3/if_vmxvar.h index 6c797214b2ee..a6129f911fbe 100644 --- a/sys/dev/vmware/vmxnet3/if_vmxvar.h +++ b/sys/dev/vmware/vmxnet3/if_vmxvar.h @@ -277,14 +277,13 @@ struct vmxnet3_softc { */ #define VMXNET3_TX_MAXSEGS 32 #define VMXNET3_TX_MAXSIZE (VMXNET3_TX_MAXSEGS * MCLBYTES) -#define VMXNET3_TSO_MAXSIZE \ - (VMXNET3_TX_MAXSIZE - sizeof(struct ether_vlan_header)) /* * Maximum support Tx segments size. The length field in the * Tx descriptor is 14 bits. */ -#define VMXNET3_TX_MAXSEGSIZE (1 << 14) +#define VMXNET3_TX_MAXSEGSHIFT 14 +#define VMXNET3_TX_MAXSEGSIZE (1 << VMXNET3_TX_MAXSEGSHIFT) /* * The maximum number of Rx segments we accept. When LRO is enabled, diff --git a/sys/dev/xen/netfront/netfront.c b/sys/dev/xen/netfront/netfront.c index 9ff872cdc745..1fadee46a9c0 100644 --- a/sys/dev/xen/netfront/netfront.c +++ b/sys/dev/xen/netfront/netfront.c @@ -134,7 +134,6 @@ static const int MODPARM_rx_flip = 0; * to mirror the Linux MAX_SKB_FRAGS constant. */ #define MAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2) -#define NF_TSO_MAXBURST ((IP_MAXPACKET / PAGE_SIZE) * MCLBYTES) #define RX_COPY_THRESHOLD 256 @@ -2102,7 +2101,10 @@ create_netdev(device_t dev) ifp->if_hwassist = XN_CSUM_FEATURES; ifp->if_capabilities = IFCAP_HWCSUM; - ifp->if_hw_tsomax = NF_TSO_MAXBURST; + ifp->if_hw_tsomax = IF_HW_TSOMAX_BUILD_VALUE( + 65535 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN) /* bytes */, + MAX_TX_REQ_FRAGS /* maximum frag count */, + PAGE_SHIFT /* PAGE_SIZE frag size */); ether_ifattach(ifp, np->mac); callout_init(&np->xn_stat_ch, CALLOUT_MPSAFE); diff --git a/sys/net/if.c b/sys/net/if.c index 017af336d2b6..5e9b77685449 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -422,6 +422,52 @@ if_grow(void) V_ifindex_table = e; } +/* + * Compute the least common value of two "if_hw_tsomax" values: + */ +u_int +if_hw_tsomax_common(u_int a, u_int b) +{ + u_int a_bytes = IF_HW_TSOMAX_GET_BYTES(a); + u_int a_frag_count = IF_HW_TSOMAX_GET_FRAG_COUNT(a); + u_int a_frag_size = IF_HW_TSOMAX_GET_FRAG_SIZE(a); + u_int b_bytes = IF_HW_TSOMAX_GET_BYTES(b); + u_int b_frag_count = IF_HW_TSOMAX_GET_FRAG_COUNT(b); + u_int b_frag_size = IF_HW_TSOMAX_GET_FRAG_SIZE(b); + + return (IF_HW_TSOMAX_BUILD_VALUE(min(a_bytes, b_bytes), + min(a_frag_count, b_frag_count), + min(a_frag_size, b_frag_size))); +} + +/* + * Range check the "if_hw_tsomax" value: + */ +u_int +if_hw_tsomax_range_check(u_int a) +{ + u_int a_bytes = IF_HW_TSOMAX_GET_BYTES(a); + u_int a_frag_count = IF_HW_TSOMAX_GET_FRAG_COUNT(a); + u_int a_frag_size = IF_HW_TSOMAX_GET_FRAG_SIZE(a); + + /* round down to nearest 4 bytes */ + a_bytes &= 0xFFFC; + + /* use default, if zero */ + if (a_bytes == 0) + a_bytes = IF_HW_TSOMAX_DEFAULT_BYTES; + + /* use default, if zero */ + if (a_frag_count == 0) + a_frag_count = IF_HW_TSOMAX_DEFAULT_FRAG_COUNT; + + /* use default, if zero */ + if (a_frag_size == 0) + a_frag_size = IF_HW_TSOMAX_DEFAULT_FRAG_SIZE; + + return (IF_HW_TSOMAX_BUILD_VALUE(a_bytes, a_frag_count, a_frag_size)); +} + /* * Allocate a struct ifnet and an index for an interface. A layer 2 * common structure will also be allocated if an allocation routine is @@ -445,6 +491,7 @@ if_alloc(u_char type) ifp->if_index = idx; ifp->if_type = type; ifp->if_alloctype = type; + ifp->if_hw_tsomax = IF_HW_TSOMAX_DEFAULT_VALUE(); if (if_com_alloc[type] != NULL) { ifp->if_l2com = if_com_alloc[type](type, ifp); if (ifp->if_l2com == NULL) { @@ -657,16 +704,9 @@ if_attach_internal(struct ifnet *ifp, int vmove) TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link); /* Reliably crash if used uninitialized. */ ifp->if_broadcastaddr = NULL; - -#if defined(INET) || defined(INET6) - /* Initialize to max value. */ - if (ifp->if_hw_tsomax == 0) - ifp->if_hw_tsomax = min(IP_MAXPACKET, 32 * MCLBYTES - - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)); - KASSERT(ifp->if_hw_tsomax <= IP_MAXPACKET && - ifp->if_hw_tsomax >= IP_MAXPACKET / 8, - ("%s: tsomax outside of range", __func__)); -#endif + /* range check TSO value */ + ifp->if_hw_tsomax = + if_hw_tsomax_range_check(ifp->if_hw_tsomax); } #ifdef VIMAGE else { diff --git a/sys/net/if_lagg.c b/sys/net/if_lagg.c index 8d53526c6a02..e7cfb3c1f753 100644 --- a/sys/net/if_lagg.c +++ b/sys/net/if_lagg.c @@ -445,11 +445,7 @@ lagg_capabilities(struct lagg_softc *sc) struct lagg_port *lp; int cap = ~0, ena = ~0; u_long hwa = ~0UL; -#if defined(INET) || defined(INET6) - u_int hw_tsomax = IP_MAXPACKET; /* Initialize to the maximum value. */ -#else - u_int hw_tsomax = ~0; /* if_hw_tsomax is only for INET/INET6, but.. */ -#endif + u_int hw_tsomax = IF_HW_TSOMAX_DEFAULT_VALUE(); LAGG_WLOCK_ASSERT(sc); @@ -458,10 +454,9 @@ lagg_capabilities(struct lagg_softc *sc) cap &= lp->lp_ifp->if_capabilities; ena &= lp->lp_ifp->if_capenable; hwa &= lp->lp_ifp->if_hwassist; - /* Set to the minimum value of the lagg ports. */ - if (lp->lp_ifp->if_hw_tsomax < hw_tsomax && - lp->lp_ifp->if_hw_tsomax > 0) - hw_tsomax = lp->lp_ifp->if_hw_tsomax; + /* Set to the common value of the lagg ports. */ + hw_tsomax = if_hw_tsomax_common(hw_tsomax, + lp->lp_ifp->if_hw_tsomax); } cap = (cap == ~0 ? 0 : cap); ena = (ena == ~0 ? 0 : ena); diff --git a/sys/net/if_var.h b/sys/net/if_var.h index 09f41b8097e9..af83713c24df 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -119,6 +119,43 @@ typedef void (*if_qflush_fn_t)(if_t); typedef int (*if_transmit_fn_t)(if_t, struct mbuf *); typedef uint64_t (*if_get_counter_t)(if_t, ifnet_counter); +/* + * Macros defining how to decode the "if_hw_tsomax" field: + */ +#define IF_HW_TSOMAX_GET_BYTES(x) \ + ((uint16_t)(x)) /* 32..65535 */ + +#define IF_HW_TSOMAX_GET_FRAG_COUNT(x) \ + ((uint8_t)((x) >> 16)) /* 1..255 */ + +#define IF_HW_TSOMAX_GET_FRAG_SIZE(x) \ + ((uint8_t)((x) >> 24)) /* 12..16 */ + +/* + * The following macro defines how to build the "if_hw_tsomax" + * field. The "bytes" field has unit 1 bytes and declares the maximum + * number of bytes which can be transferred by a single transmit + * offload, TSO, job. The "bytes" field is rounded down to the neares + * 4 bytes to avoid having the hardware do unaligned memory + * accesses. The "frag_count" field has unit 1 fragment and declares + * the maximum number of fragments a TSO job can contain. The + * "frag_size" field has unit logarithm in base 2 of the actual value + * in bytes and declares the maximum size of a fragment. + */ +#define IF_HW_TSOMAX_BUILD_VALUE(bytes, frag_count, frag_size) \ + (((bytes) & 0xFFFC) | (((frag_count) & 0xFF) << 16) | \ + (((frag_size) & 0xFF) << 24)) + +#define IF_HW_TSOMAX_DEFAULT_BYTES (65536 - 4) +#define IF_HW_TSOMAX_DEFAULT_FRAG_COUNT 255 +#define IF_HW_TSOMAX_DEFAULT_FRAG_SIZE 16 + +#define IF_HW_TSOMAX_DEFAULT_VALUE() \ + IF_HW_TSOMAX_BUILD_VALUE( \ + IF_HW_TSOMAX_DEFAULT_BYTES, \ + IF_HW_TSOMAX_DEFAULT_FRAG_COUNT, \ + IF_HW_TSOMAX_DEFAULT_FRAG_SIZE) + /* * Structure defining a network interface. * @@ -222,8 +259,7 @@ struct ifnet { if_get_counter_t if_get_counter; /* get counter values */ /* Stuff that's only temporary and doesn't belong here. */ - u_int if_hw_tsomax; /* tso burst length limit, the minimum - * is (IP_MAXPACKET / 8). + u_int if_hw_tsomax; /* TSO burst length limits. * XXXAO: Have to find a better place * for it eventually. */ /* @@ -608,6 +644,10 @@ void if_setioctlfn(if_t ifp, int (*)(if_t, u_long, caddr_t)); void if_setstartfn(if_t ifp, void (*)(if_t)); void if_settransmitfn(if_t ifp, if_transmit_fn_t); void if_setqflushfn(if_t ifp, if_qflush_fn_t); + +/* "if_hw_tsomax" related functions */ +u_int if_hw_tsomax_common(u_int, u_int); +u_int if_hw_tsomax_range_check(u_int); /* Revisit the below. These are inline functions originally */ int drbr_inuse_drv(if_t ifp, struct buf_ring *br); diff --git a/sys/net/if_vlan.c b/sys/net/if_vlan.c index f551ffd618fc..fe2560c6fa7f 100644 --- a/sys/net/if_vlan.c +++ b/sys/net/if_vlan.c @@ -1511,8 +1511,8 @@ vlan_capabilities(struct ifvlan *ifv) * propagate the hardware-assisted flag. TSO on VLANs * does not necessarily require hardware VLAN tagging. */ - if (p->if_hw_tsomax > 0) - ifp->if_hw_tsomax = p->if_hw_tsomax; + ifp->if_hw_tsomax = if_hw_tsomax_common(ifp->if_hw_tsomax, + p->if_hw_tsomax); if (p->if_capabilities & IFCAP_VLAN_HWTSO) ifp->if_capabilities |= p->if_capabilities & IFCAP_TSO; if (p->if_capenable & IFCAP_VLAN_HWTSO) { diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index 12d8e754ab5b..5537416e5326 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include @@ -767,18 +768,88 @@ tcp_output(struct tcpcb *tp) flags &= ~TH_FIN; if (tso) { + u_int if_hw_tsomax_bytes; + u_int if_hw_tsomax_frag_count; + u_int if_hw_tsomax_frag_size; + struct mbuf *mb; + u_int moff; + int max_len; + + /* extract TSO information */ + if_hw_tsomax_bytes = + IF_HW_TSOMAX_GET_BYTES(tp->t_tsomax); + if_hw_tsomax_frag_count = + IF_HW_TSOMAX_GET_FRAG_COUNT(tp->t_tsomax); + if_hw_tsomax_frag_size = + IF_HW_TSOMAX_GET_FRAG_SIZE(tp->t_tsomax); + + /* compute maximum TSO length */ + max_len = (if_hw_tsomax_bytes - hdrlen); + + /* clamp maximum length value */ + if (max_len > IP_MAXPACKET) + max_len = IP_MAXPACKET; + else if (max_len < 0) + max_len = 0; + + /* get smallest length */ + if (len > (u_int)max_len) { + if (max_len != 0) + sendalot = 1; + len = (u_int)max_len; + } + KASSERT(ipoptlen == 0, ("%s: TSO can't do IP options", __func__)); + max_len = 0; + mb = sbsndptr(&so->so_snd, off, len, &moff); + + /* now make sure the number of fragments fit too */ + while (mb != NULL && (u_int)max_len < len) { + u_int cur_length; + u_int cur_frags; + + /* + * Get length of mbuf fragment and how + * many hardware frags, rounded up, it + * would use: + */ + cur_length = (mb->m_len - moff); + cur_frags = (cur_length + + (1 << if_hw_tsomax_frag_size) - 1) >> + if_hw_tsomax_frag_size; + + /* Handle special case: Zero Length Mbuf */ + if (cur_frags == 0) + cur_frags = 1; + + /* + * Check if the fragment limit will be + * reached or exceeded: + */ + if (cur_frags >= if_hw_tsomax_frag_count) { + max_len += min(cur_length, + if_hw_tsomax_frag_count << + if_hw_tsomax_frag_size); + break; + } + max_len += cur_length; + if_hw_tsomax_frag_count -= cur_frags; + moff = 0; + mb = mb->m_next; + } + /* * Limit a burst to t_tsomax minus IP, * TCP and options length to keep ip->ip_len * from overflowing or exceeding the maximum * length allowed by the network interface. */ - if (len > tp->t_tsomax - hdrlen) { - len = tp->t_tsomax - hdrlen; - sendalot = 1; + if (len > (u_int)max_len) { + if (max_len != 0) + sendalot = 1; + len = (u_int)max_len; } /* diff --git a/sys/ofed/drivers/net/mlx4/en_netdev.c b/sys/ofed/drivers/net/mlx4/en_netdev.c index d8b015b1407d..a8eb5c83fed1 100644 --- a/sys/ofed/drivers/net/mlx4/en_netdev.c +++ b/sys/ofed/drivers/net/mlx4/en_netdev.c @@ -673,6 +673,12 @@ int mlx4_en_do_start_port(struct net_device *dev) else priv->rx_csum = 0; + /* set TSO limits so that we don't have to drop TX packets */ + dev->if_hw_tsomax = IF_HW_TSOMAX_BUILD_VALUE( + 65535 - sizeof(struct ether_vlan_header) /* bytes */, + 16 /* maximum frag count */, + 16 /* can do up to 4GByte */); + err = mlx4_wol_read(priv->mdev->dev, &config, priv->port); if (err) { en_err(priv, "Failed to get WoL info, unable to modify\n");