From 64ae02c36579bad7d5e682589a0bc1023e359f9d Mon Sep 17 00:00:00 2001 From: Luigi Rizzo Date: Mon, 27 Feb 2012 19:05:01 +0000 Subject: [PATCH] A bunch of netmap fixes: USERSPACE: 1. add support for devices with different number of rx and tx queues; 2. add better support for zero-copy operation, adding an extra field to the netmap ring to indicate how many buffers we have already processed but not yet released (with help from Eddie Kohler); 3. The two changes above unfortunately require an API change, so while at it add a version field and some spares to the ioctl() argument to help detect mismatches. 4. update the manual page for the two changes above; 5. update sample applications in tools/tools/netmap KERNEL: 1. simplify the internal structures moving the global wait queues to the 'struct netmap_adapter'; 2. simplify the functions that map kring<->nic ring indexes 3. normalize device-specific code, helps mainteinance; 4. start exploring the impact of micro-optimizations (prefetch etc.) in the ixgbe driver. Use 'legacy' descriptors on the tx ring and prefetch slots gives about 20% speedup at 900 MHz. Another 7-10% would come from removing the explict calls to bus_dmamap* in the core (they are effectively NOPs in this case, but it takes expensive load of the per-buffer dma maps to figure out that they are all NULL. Rx performance not investigated. I am postponing the MFC so i can import a few more improvements before merging. --- share/man/man4/netmap.4 | 22 +- sys/dev/e1000/if_em.c | 9 +- sys/dev/e1000/if_igb.c | 9 +- sys/dev/e1000/if_lem.c | 8 +- sys/dev/ixgbe/ixgbe.c | 10 +- sys/dev/netmap/if_em_netmap.h | 159 ++++++------- sys/dev/netmap/if_igb_netmap.h | 152 +++++++------ sys/dev/netmap/if_lem_netmap.h | 178 ++++++++------- sys/dev/netmap/if_re_netmap.h | 139 ++++++------ sys/dev/netmap/ixgbe_netmap.h | 157 ++++++------- sys/dev/netmap/netmap.c | 399 +++++++++++++++++++-------------- sys/dev/netmap/netmap_kern.h | 77 +++---- sys/dev/re/if_re.c | 1 + sys/net/netmap.h | 143 +++++++----- sys/net/netmap_user.h | 18 +- tools/tools/netmap/bridge.c | 22 +- tools/tools/netmap/pcap.c | 4 +- tools/tools/netmap/pkt-gen.c | 11 +- 18 files changed, 810 insertions(+), 708 deletions(-) diff --git a/share/man/man4/netmap.4 b/share/man/man4/netmap.4 index 1ad574de027a..dfd96b079099 100644 --- a/share/man/man4/netmap.4 +++ b/share/man/man4/netmap.4 @@ -28,7 +28,7 @@ .\" $FreeBSD$ .\" $Id: netmap.4 9662 2011-11-16 13:18:06Z luigi $: stable/8/share/man/man4/bpf.4 181694 2008-08-13 17:45:06Z ed $ .\" -.Dd November 16, 2011 +.Dd February 27, 2012 .Dt NETMAP 4 .Os .Sh NAME @@ -123,8 +123,9 @@ one ring pair (numbered N) for packets from/to the host stack. struct netmap_ring { const ssize_t buf_ofs; const uint32_t num_slots; /* number of slots in the ring. */ - uint32_t avail; /* number of usable slots */ - uint32_t cur; /* 'current' index for the user side */ + uint32_t avail; /* number of usable slots */ + uint32_t cur; /* 'current' index for the user side */ + uint32_t reserved; /* not refilled before current */ const uint16_t nr_buf_size; uint16_t flags; @@ -173,10 +174,14 @@ defined as follows: .Bd -literal struct nmreq { char nr_name[IFNAMSIZ]; + uint32_t nr_version; /* API version */ +#define NETMAP_API 2 /* current version */ uint32_t nr_offset; /* nifp offset in the shared region */ uint32_t nr_memsize; /* size of the shared region */ - uint32_t nr_numdescs; /* descriptors per queue */ - uint16_t nr_numqueues; + uint32_t nr_tx_slots; /* slots in tx rings */ + uint32_t nr_rx_slots; /* slots in rx rings */ + uint16_t nr_tx_rings; /* number of tx rings */ + uint16_t nr_rx_rings; /* number of tx rings */ uint16_t nr_ringid; /* ring(s) we care about */ #define NETMAP_HW_RING 0x4000 /* low bits indicate one hw ring */ #define NETMAP_SW_RING 0x2000 /* we process the sw ring */ @@ -199,8 +204,10 @@ and are: returns information about the interface named in nr_name. On return, nr_memsize indicates the size of the shared netmap memory region (this is device-independent), -nr_numslots indicates how many buffers are in a ring, -nr_numrings indicates the number of rings supported by the hardware. +nr_tx_slots and nr_rx_slots indicates how many buffers are in a +transmit and receive ring, +nr_tx_rings and nr_rx_rings indicates the number of transmit +and receive rings supported by the hardware. .Pp If the device does not support netmap, the ioctl returns EINVAL. .It Dv NIOCREGIF @@ -266,6 +273,7 @@ struct netmap_request nmr; fd = open("/dev/netmap", O_RDWR); bzero(&nmr, sizeof(nmr)); strcpy(nmr.nm_name, "ix0"); +nmr.nm_version = NETMAP_API; ioctl(fd, NIOCREG, &nmr); p = mmap(0, nmr.memsize, fd); nifp = NETMAP_IF(p, nmr.offset); diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c index 9495c7948475..8ab511d837e0 100644 --- a/sys/dev/e1000/if_em.c +++ b/sys/dev/e1000/if_em.c @@ -3296,7 +3296,7 @@ em_setup_transmit_ring(struct tx_ring *txr) } #ifdef DEV_NETMAP if (slot) { - int si = netmap_tidx_n2k(na, txr->me, i); + int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); uint64_t paddr; void *addr; @@ -3759,7 +3759,7 @@ em_txeof(struct tx_ring *txr) selwakeuppri(&na->tx_rings[txr->me].si, PI_NET); EM_TX_UNLOCK(txr); EM_CORE_LOCK(adapter); - selwakeuppri(&na->tx_rings[na->num_queues + 1].si, PI_NET); + selwakeuppri(&na->tx_si, PI_NET); EM_CORE_UNLOCK(adapter); EM_TX_LOCK(txr); return (FALSE); @@ -4051,7 +4051,7 @@ em_setup_receive_ring(struct rx_ring *rxr) rxbuf = &rxr->rx_buffers[j]; #ifdef DEV_NETMAP if (slot) { - int si = netmap_ridx_n2k(na, rxr->me, j); + int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j); uint64_t paddr; void *addr; @@ -4370,10 +4370,11 @@ em_rxeof(struct rx_ring *rxr, int count, int *done) if (ifp->if_capenable & IFCAP_NETMAP) { struct netmap_adapter *na = NA(ifp); + na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR; selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET); EM_RX_UNLOCK(rxr); EM_CORE_LOCK(adapter); - selwakeuppri(&na->rx_rings[na->num_queues + 1].si, PI_NET); + selwakeuppri(&na->rx_si, PI_NET); EM_CORE_UNLOCK(adapter); return (0); } diff --git a/sys/dev/e1000/if_igb.c b/sys/dev/e1000/if_igb.c index a70b4adce937..4c842817b757 100644 --- a/sys/dev/e1000/if_igb.c +++ b/sys/dev/e1000/if_igb.c @@ -3315,7 +3315,7 @@ igb_setup_transmit_ring(struct tx_ring *txr) } #ifdef DEV_NETMAP if (slot) { - int si = netmap_tidx_n2k(na, txr->me, i); + int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); /* no need to set the address */ netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si)); } @@ -3693,7 +3693,7 @@ igb_txeof(struct tx_ring *txr) selwakeuppri(&na->tx_rings[txr->me].si, PI_NET); IGB_TX_UNLOCK(txr); IGB_CORE_LOCK(adapter); - selwakeuppri(&na->tx_rings[na->num_queues + 1].si, PI_NET); + selwakeuppri(&na->tx_si, PI_NET); IGB_CORE_UNLOCK(adapter); IGB_TX_LOCK(txr); return FALSE; @@ -4057,7 +4057,7 @@ igb_setup_receive_ring(struct rx_ring *rxr) #ifdef DEV_NETMAP if (slot) { /* slot sj is mapped to the i-th NIC-ring entry */ - int sj = netmap_ridx_n2k(na, rxr->me, j); + int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j); uint64_t paddr; void *addr; @@ -4554,10 +4554,11 @@ igb_rxeof(struct igb_queue *que, int count, int *done) if (ifp->if_capenable & IFCAP_NETMAP) { struct netmap_adapter *na = NA(ifp); + na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR; selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET); IGB_RX_UNLOCK(rxr); IGB_CORE_LOCK(adapter); - selwakeuppri(&na->rx_rings[na->num_queues + 1].si, PI_NET); + selwakeuppri(&na->rx_si, PI_NET); IGB_CORE_UNLOCK(adapter); return (0); } diff --git a/sys/dev/e1000/if_lem.c b/sys/dev/e1000/if_lem.c index be4626054a53..b5bca6a8e4fb 100644 --- a/sys/dev/e1000/if_lem.c +++ b/sys/dev/e1000/if_lem.c @@ -2669,7 +2669,7 @@ lem_setup_transmit_structures(struct adapter *adapter) #ifdef DEV_NETMAP if (slot) { /* the i-th NIC entry goes to slot si */ - int si = netmap_tidx_n2k(na, 0, i); + int si = netmap_idx_n2k(&na->tx_rings[0], i); uint64_t paddr; void *addr; @@ -3243,7 +3243,7 @@ lem_setup_receive_structures(struct adapter *adapter) #ifdef DEV_NETMAP if (slot) { /* the i-th NIC entry goes to slot si */ - int si = netmap_ridx_n2k(na, 0, i); + int si = netmap_idx_n2k(&na->rx_rings[0], i); uint64_t paddr; void *addr; @@ -3475,7 +3475,9 @@ lem_rxeof(struct adapter *adapter, int count, int *done) #ifdef DEV_NETMAP if (ifp->if_capenable & IFCAP_NETMAP) { - selwakeuppri(&NA(ifp)->rx_rings[0].si, PI_NET); + struct netmap_adapter *na = NA(ifp); + na->rx_rings[0].nr_kflags |= NKR_PENDINTR; + selwakeuppri(&na->rx_rings[0].si, PI_NET); EM_RX_UNLOCK(adapter); return (0); } diff --git a/sys/dev/ixgbe/ixgbe.c b/sys/dev/ixgbe/ixgbe.c index 48a27b015bd5..befedc9dca17 100644 --- a/sys/dev/ixgbe/ixgbe.c +++ b/sys/dev/ixgbe/ixgbe.c @@ -2970,10 +2970,10 @@ ixgbe_setup_transmit_ring(struct tx_ring *txr) * kring->nkr_hwofs positions "ahead" wrt the * corresponding slot in the NIC ring. In some drivers * (not here) nkr_hwofs can be negative. Function - * netmap_tidx_n2k() handles wraparounds properly. + * netmap_idx_n2k() handles wraparounds properly. */ if (slot) { - int si = netmap_tidx_n2k(na, txr->me, i); + int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si)); } #endif /* DEV_NETMAP */ @@ -3491,7 +3491,7 @@ ixgbe_txeof(struct tx_ring *txr) selwakeuppri(&na->tx_rings[txr->me].si, PI_NET); IXGBE_TX_UNLOCK(txr); IXGBE_CORE_LOCK(adapter); - selwakeuppri(&na->tx_rings[na->num_queues + 1].si, PI_NET); + selwakeuppri(&na->tx_si, PI_NET); IXGBE_CORE_UNLOCK(adapter); IXGBE_TX_LOCK(txr); } @@ -3922,7 +3922,7 @@ ixgbe_setup_receive_ring(struct rx_ring *rxr) * an mbuf, so end the block with a continue; */ if (slot) { - int sj = netmap_ridx_n2k(na, rxr->me, j); + int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j); uint64_t paddr; void *addr; @@ -4376,7 +4376,7 @@ ixgbe_rxeof(struct ix_queue *que, int count) selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET); IXGBE_RX_UNLOCK(rxr); IXGBE_CORE_LOCK(adapter); - selwakeuppri(&na->rx_rings[na->num_queues + 1].si, PI_NET); + selwakeuppri(&na->rx_si, PI_NET); IXGBE_CORE_UNLOCK(adapter); return (FALSE); } diff --git a/sys/dev/netmap/if_em_netmap.h b/sys/dev/netmap/if_em_netmap.h index 70d92cf23b73..8a3893e3b1a4 100644 --- a/sys/dev/netmap/if_em_netmap.h +++ b/sys/dev/netmap/if_em_netmap.h @@ -25,45 +25,23 @@ /* * $FreeBSD$ - * $Id: if_em_netmap.h 9802 2011-12-02 18:42:37Z luigi $ + * $Id: if_em_netmap.h 10627 2012-02-23 19:37:15Z luigi $ * - * netmap support for if_em.c + * netmap support for em. * - * For structure and details on the individual functions please see - * ixgbe_netmap.h + * For more details on netmap support please see ixgbe_netmap.h */ + #include #include #include #include /* vtophys ? */ #include + static void em_netmap_block_tasks(struct adapter *); static void em_netmap_unblock_tasks(struct adapter *); -static int em_netmap_reg(struct ifnet *, int onoff); -static int em_netmap_txsync(struct ifnet *, u_int, int); -static int em_netmap_rxsync(struct ifnet *, u_int, int); -static void em_netmap_lock_wrapper(struct ifnet *, int, u_int); - - -static void -em_netmap_attach(struct adapter *adapter) -{ - struct netmap_adapter na; - - bzero(&na, sizeof(na)); - - na.ifp = adapter->ifp; - na.separate_locks = 1; - na.num_tx_desc = adapter->num_tx_desc; - na.num_rx_desc = adapter->num_rx_desc; - na.nm_txsync = em_netmap_txsync; - na.nm_rxsync = em_netmap_rxsync; - na.nm_lock = em_netmap_lock_wrapper; - na.nm_register = em_netmap_reg; - netmap_attach(&na, adapter->num_queues); -} static void @@ -137,7 +115,7 @@ em_netmap_unblock_tasks(struct adapter *adapter) /* - * register-unregister routine + * Register/unregister routine */ static int em_netmap_reg(struct ifnet *ifp, int onoff) @@ -180,17 +158,17 @@ em_netmap_reg(struct ifnet *ifp, int onoff) /* - * Reconcile hardware and user view of the transmit ring. + * Reconcile kernel and user view of the transmit ring. */ static int em_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock) { struct adapter *adapter = ifp->if_softc; struct tx_ring *txr = &adapter->tx_rings[ring_nr]; - struct netmap_adapter *na = NA(adapter->ifp); + struct netmap_adapter *na = NA(ifp); struct netmap_kring *kring = &na->tx_rings[ring_nr]; struct netmap_ring *ring = kring->ring; - int j, k, l, n = 0, lim = kring->nkr_num_slots - 1; + u_int j, k, l, n = 0, lim = kring->nkr_num_slots - 1; /* generate an interrupt approximately every half ring */ int report_frequency = kring->nkr_num_slots >> 1; @@ -204,16 +182,17 @@ em_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock) bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_POSTREAD); - /* check for new packets to send. - * j indexes the netmap ring, l indexes the nic ring, and - * j = kring->nr_hwcur, l = E1000_TDT (not tracked), - * j == (l + kring->nkr_hwofs) % ring_size + /* + * Process new packets to send. j is the current index in the + * netmap ring, l is the corresponding index in the NIC ring. */ j = kring->nr_hwcur; - if (j != k) { /* we have packets to send */ - l = netmap_tidx_k2n(na, ring_nr, j); + if (j != k) { /* we have new packets to send */ + l = netmap_idx_k2n(kring, j); for (n = 0; j != k; n++) { + /* slot is the current slot in the netmap ring */ struct netmap_slot *slot = &ring->slot[j]; + /* curr is the current slot in the nic ring */ struct e1000_tx_desc *curr = &txr->tx_base[l]; struct em_buffer *txbuf = &txr->tx_buffers[l]; int flags = ((slot->flags & NS_REPORT) || @@ -221,7 +200,7 @@ em_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock) E1000_TXD_CMD_RS : 0; uint64_t paddr; void *addr = PNMB(slot, &paddr); - int len = slot->len; + u_int len = slot->len; if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) { if (do_lock) @@ -230,25 +209,21 @@ em_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock) } slot->flags &= ~NS_REPORT; - curr->upper.data = 0; - curr->lower.data = - htole32(adapter->txd_cmd | len | - (E1000_TXD_CMD_EOP | flags) ); if (slot->flags & NS_BUF_CHANGED) { curr->buffer_addr = htole64(paddr); /* buffer has changed, reload map */ netmap_reload_map(txr->txtag, txbuf->map, addr); slot->flags &= ~NS_BUF_CHANGED; } - + curr->upper.data = 0; + curr->lower.data = htole32(adapter->txd_cmd | len | + (E1000_TXD_CMD_EOP | flags) ); bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_PREWRITE); j = (j == lim) ? 0 : j + 1; l = (l == lim) ? 0 : l + 1; } - kring->nr_hwcur = k; - - /* decrease avail by number of sent packets */ + kring->nr_hwcur = k; /* the saved ring->cur */ kring->nr_hwavail -= n; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, @@ -275,7 +250,7 @@ em_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock) kring->nr_hwavail += delta; } } - /* update avail to what the hardware knows */ + /* update avail to what the kernel knows */ ring->avail = kring->nr_hwavail; if (do_lock) @@ -292,10 +267,12 @@ em_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock) { struct adapter *adapter = ifp->if_softc; struct rx_ring *rxr = &adapter->rx_rings[ring_nr]; - struct netmap_adapter *na = NA(adapter->ifp); + struct netmap_adapter *na = NA(ifp); struct netmap_kring *kring = &na->rx_rings[ring_nr]; struct netmap_ring *ring = kring->ring; - int j, k, l, n, lim = kring->nkr_num_slots - 1; + u_int j, l, n, lim = kring->nkr_num_slots - 1; + int force_update = do_lock || kring->nr_kflags & NKR_PENDINTR; + u_int k = ring->cur, resvd = ring->reserved; k = ring->cur; if (k > lim) @@ -308,37 +285,45 @@ em_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock) bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - /* import newly received packets into the netmap ring. - * j is an index in the netmap ring, l in the NIC ring, and - * j = (kring->nr_hwcur + kring->nr_hwavail) % ring_size - * l = rxr->next_to_check; - * and - * j == (l + kring->nkr_hwofs) % ring_size + /* + * Import newly received packets into the netmap ring. + * j is an index in the netmap ring, l in the NIC ring. */ l = rxr->next_to_check; - j = netmap_ridx_n2k(na, ring_nr, l); - for (n = 0; ; n++) { - struct e1000_rx_desc *curr = &rxr->rx_base[l]; + j = netmap_idx_n2k(kring, l); + if (netmap_no_pendintr || force_update) { + for (n = 0; ; n++) { + struct e1000_rx_desc *curr = &rxr->rx_base[l]; + uint32_t staterr = le32toh(curr->status); - if ((curr->status & E1000_RXD_STAT_DD) == 0) - break; - ring->slot[j].len = le16toh(curr->length); - bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[l].map, - BUS_DMASYNC_POSTREAD); - j = (j == lim) ? 0 : j + 1; - /* make sure next_to_refresh follows next_to_check */ - rxr->next_to_refresh = l; // XXX - l = (l == lim) ? 0 : l + 1; - } - if (n) { - rxr->next_to_check = l; - kring->nr_hwavail += n; + if ((staterr & E1000_RXD_STAT_DD) == 0) + break; + ring->slot[j].len = le16toh(curr->length); + bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[l].map, + BUS_DMASYNC_POSTREAD); + j = (j == lim) ? 0 : j + 1; + /* make sure next_to_refresh follows next_to_check */ + rxr->next_to_refresh = l; // XXX + l = (l == lim) ? 0 : l + 1; + } + if (n) { /* update the state variables */ + rxr->next_to_check = l; + kring->nr_hwavail += n; + } + kring->nr_kflags &= ~NKR_PENDINTR; } - /* skip past packets that userspace has already processed */ + /* skip past packets that userspace has released */ j = kring->nr_hwcur; /* netmap ring index */ - if (j != k) { /* userspace has read some packets. */ - l = netmap_ridx_k2n(na, ring_nr, j); /* NIC ring index */ + if (resvd > 0) { + if (resvd + ring->avail >= lim + 1) { + D("XXX invalid reserve/avail %d %d", resvd, ring->avail); + ring->reserved = resvd = 0; // XXX panic... + } + k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd; + } + if (j != k) { /* userspace has released some packets. */ + l = netmap_idx_k2n(kring, j); /* NIC ring index */ for (n = 0; j != k; n++) { struct netmap_slot *slot = &ring->slot[j]; struct e1000_rx_desc *curr = &rxr->rx_base[l]; @@ -352,17 +337,15 @@ em_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock) return netmap_ring_reinit(kring); } - curr->status = 0; if (slot->flags & NS_BUF_CHANGED) { curr->buffer_addr = htole64(paddr); /* buffer has changed, reload map */ netmap_reload_map(rxr->rxtag, rxbuf->map, addr); slot->flags &= ~NS_BUF_CHANGED; } - + curr->status = 0; bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_PREREAD); - j = (j == lim) ? 0 : j + 1; l = (l == lim) ? 0 : l + 1; } @@ -378,9 +361,29 @@ em_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock) E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), l); } /* tell userspace that there are new packets */ - ring->avail = kring->nr_hwavail ; + ring->avail = kring->nr_hwavail - resvd; if (do_lock) EM_RX_UNLOCK(rxr); return 0; } + + +static void +em_netmap_attach(struct adapter *adapter) +{ + struct netmap_adapter na; + + bzero(&na, sizeof(na)); + + na.ifp = adapter->ifp; + na.separate_locks = 1; + na.num_tx_desc = adapter->num_tx_desc; + na.num_rx_desc = adapter->num_rx_desc; + na.nm_txsync = em_netmap_txsync; + na.nm_rxsync = em_netmap_rxsync; + na.nm_lock = em_netmap_lock_wrapper; + na.nm_register = em_netmap_reg; + netmap_attach(&na, adapter->num_queues); +} + /* end of file */ diff --git a/sys/dev/netmap/if_igb_netmap.h b/sys/dev/netmap/if_igb_netmap.h index 3d37b698e526..8ba13ee6af61 100644 --- a/sys/dev/netmap/if_igb_netmap.h +++ b/sys/dev/netmap/if_igb_netmap.h @@ -25,41 +25,19 @@ /* * $FreeBSD$ - * $Id: if_igb_netmap.h 9802 2011-12-02 18:42:37Z luigi $ + * $Id: if_igb_netmap.h 10627 2012-02-23 19:37:15Z luigi $ * - * netmap modifications for igb contributed by Ahmed Kooli + * Netmap support for igb, partly contributed by Ahmed Kooli + * For details on netmap support please see ixgbe_netmap.h */ + #include #include #include #include /* vtophys ? */ #include -static int igb_netmap_reg(struct ifnet *, int onoff); -static int igb_netmap_txsync(struct ifnet *, u_int, int); -static int igb_netmap_rxsync(struct ifnet *, u_int, int); -static void igb_netmap_lock_wrapper(struct ifnet *, int, u_int); - - -static void -igb_netmap_attach(struct adapter *adapter) -{ - struct netmap_adapter na; - - bzero(&na, sizeof(na)); - - na.ifp = adapter->ifp; - na.separate_locks = 1; - na.num_tx_desc = adapter->num_tx_desc; - na.num_rx_desc = adapter->num_rx_desc; - na.nm_txsync = igb_netmap_txsync; - na.nm_rxsync = igb_netmap_rxsync; - na.nm_lock = igb_netmap_lock_wrapper; - na.nm_register = igb_netmap_reg; - netmap_attach(&na, adapter->num_queues); -} - /* * wrapper to export locks to the generic code @@ -134,17 +112,17 @@ igb_netmap_reg(struct ifnet *ifp, int onoff) /* - * Reconcile hardware and user view of the transmit ring. + * Reconcile kernel and user view of the transmit ring. */ static int igb_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock) { struct adapter *adapter = ifp->if_softc; struct tx_ring *txr = &adapter->tx_rings[ring_nr]; - struct netmap_adapter *na = NA(adapter->ifp); + struct netmap_adapter *na = NA(ifp); struct netmap_kring *kring = &na->tx_rings[ring_nr]; struct netmap_ring *ring = kring->ring; - int j, k, l, n = 0, lim = kring->nkr_num_slots - 1; + u_int j, k, l, n = 0, lim = kring->nkr_num_slots - 1; /* generate an interrupt approximately every half ring */ int report_frequency = kring->nkr_num_slots >> 1; @@ -164,14 +142,16 @@ igb_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock) * j == (l + kring->nkr_hwofs) % ring_size */ j = kring->nr_hwcur; - if (j != k) { /* we have packets to send */ + if (j != k) { /* we have new packets to send */ /* 82575 needs the queue index added */ u32 olinfo_status = (adapter->hw.mac.type == e1000_82575) ? (txr->me << 4) : 0; - l = netmap_tidx_k2n(na, ring_nr, j); + l = netmap_idx_k2n(kring, j); for (n = 0; j != k; n++) { + /* slot is the current slot in the netmap ring */ struct netmap_slot *slot = &ring->slot[j]; + /* curr is the current slot in the nic ring */ union e1000_adv_tx_desc *curr = (union e1000_adv_tx_desc *)&txr->tx_base[l]; struct igb_tx_buffer *txbuf = &txr->tx_buffers[l]; @@ -180,7 +160,7 @@ igb_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock) E1000_ADVTXD_DCMD_RS : 0; uint64_t paddr; void *addr = PNMB(slot, &paddr); - int len = slot->len; + u_int len = slot->len; if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) { if (do_lock) @@ -189,8 +169,13 @@ igb_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock) } slot->flags &= ~NS_REPORT; - // XXX set the address unconditionally + if (slot->flags & NS_BUF_CHANGED) { + /* buffer has changed, reload map */ + netmap_reload_map(txr->txtag, txbuf->map, addr); + slot->flags &= ~NS_BUF_CHANGED; + } curr->read.buffer_addr = htole64(paddr); + // XXX check olinfo and cmd_type_len curr->read.olinfo_status = htole32(olinfo_status | (len<< E1000_ADVTXD_PAYLEN_SHIFT)); @@ -199,20 +184,13 @@ igb_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock) E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DCMD_EOP | flags); - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, reload map */ - netmap_reload_map(txr->txtag, txbuf->map, addr); - slot->flags &= ~NS_BUF_CHANGED; - } bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_PREWRITE); j = (j == lim) ? 0 : j + 1; l = (l == lim) ? 0 : l + 1; } - kring->nr_hwcur = k; - - /* decrease avail by number of sent packets */ + kring->nr_hwcur = k; /* the saved ring->cur */ kring->nr_hwavail -= n; /* Set the watchdog XXX ? */ @@ -243,7 +221,7 @@ igb_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock) kring->nr_hwavail += delta; } } - /* update avail to what the hardware knows */ + /* update avail to what the kernel knows */ ring->avail = kring->nr_hwavail; if (do_lock) @@ -260,10 +238,12 @@ igb_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock) { struct adapter *adapter = ifp->if_softc; struct rx_ring *rxr = &adapter->rx_rings[ring_nr]; - struct netmap_adapter *na = NA(adapter->ifp); + struct netmap_adapter *na = NA(ifp); struct netmap_kring *kring = &na->rx_rings[ring_nr]; struct netmap_ring *ring = kring->ring; - int j, k, l, n, lim = kring->nkr_num_slots - 1; + u_int j, l, n, lim = kring->nkr_num_slots - 1; + int force_update = do_lock || kring->nr_kflags & NKR_PENDINTR; + u_int k = ring->cur, resvd = ring->reserved; k = ring->cur; if (k > lim) @@ -276,36 +256,43 @@ igb_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock) bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - /* import newly received packets into the netmap ring. - * j is an index in the netmap ring, l in the NIC ring, and - * j = (kring->nr_hwcur + kring->nr_hwavail) % ring_size - * l = rxr->next_to_check; - * and - * j == (l + kring->nkr_hwofs) % ring_size + /* + * import newly received packets into the netmap ring. + * j is an index in the netmap ring, l in the NIC ring. */ l = rxr->next_to_check; - j = netmap_ridx_n2k(na, ring_nr, l); - for (n = 0; ; n++) { - union e1000_adv_rx_desc *curr = &rxr->rx_base[l]; - uint32_t staterr = le32toh(curr->wb.upper.status_error); + j = netmap_idx_n2k(kring, l); + if (netmap_no_pendintr || force_update) { + for (n = 0; ; n++) { + union e1000_adv_rx_desc *curr = &rxr->rx_base[l]; + uint32_t staterr = le32toh(curr->wb.upper.status_error); - if ((staterr & E1000_RXD_STAT_DD) == 0) - break; - ring->slot[j].len = le16toh(curr->wb.upper.length); - bus_dmamap_sync(rxr->ptag, - rxr->rx_buffers[l].pmap, BUS_DMASYNC_POSTREAD); - j = (j == lim) ? 0 : j + 1; - l = (l == lim) ? 0 : l + 1; - } - if (n) { - rxr->next_to_check = l; - kring->nr_hwavail += n; + if ((staterr & E1000_RXD_STAT_DD) == 0) + break; + ring->slot[j].len = le16toh(curr->wb.upper.length); + bus_dmamap_sync(rxr->ptag, + rxr->rx_buffers[l].pmap, BUS_DMASYNC_POSTREAD); + j = (j == lim) ? 0 : j + 1; + l = (l == lim) ? 0 : l + 1; + } + if (n) { /* update the state variables */ + rxr->next_to_check = l; + kring->nr_hwavail += n; + } + kring->nr_kflags &= ~NKR_PENDINTR; } - /* skip past packets that userspace has already processed */ - j = kring->nr_hwcur; - if (j != k) { /* userspace has read some packets. */ - l = netmap_ridx_k2n(na, ring_nr, j); + /* skip past packets that userspace has released */ + j = kring->nr_hwcur; /* netmap ring index */ + if (resvd > 0) { + if (resvd + ring->avail >= lim + 1) { + D("XXX invalid reserve/avail %d %d", resvd, ring->avail); + ring->reserved = resvd = 0; // XXX panic... + } + k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd; + } + if (j != k) { /* userspace has released some packets. */ + l = netmap_idx_k2n(kring, j); for (n = 0; j != k; n++) { struct netmap_slot *slot = ring->slot + j; union e1000_adv_rx_desc *curr = &rxr->rx_base[l]; @@ -319,16 +306,14 @@ igb_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock) return netmap_ring_reinit(kring); } - curr->wb.upper.status_error = 0; - curr->read.pkt_addr = htole64(paddr); if (slot->flags & NS_BUF_CHANGED) { netmap_reload_map(rxr->ptag, rxbuf->pmap, addr); slot->flags &= ~NS_BUF_CHANGED; } - + curr->read.pkt_addr = htole64(paddr); + curr->wb.upper.status_error = 0; bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD); - j = (j == lim) ? 0 : j + 1; l = (l == lim) ? 0 : l + 1; } @@ -344,9 +329,28 @@ igb_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock) E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), l); } /* tell userspace that there are new packets */ - ring->avail = kring->nr_hwavail ; + ring->avail = kring->nr_hwavail - resvd; if (do_lock) IGB_RX_UNLOCK(rxr); return 0; } + + +static void +igb_netmap_attach(struct adapter *adapter) +{ + struct netmap_adapter na; + + bzero(&na, sizeof(na)); + + na.ifp = adapter->ifp; + na.separate_locks = 1; + na.num_tx_desc = adapter->num_tx_desc; + na.num_rx_desc = adapter->num_rx_desc; + na.nm_txsync = igb_netmap_txsync; + na.nm_rxsync = igb_netmap_rxsync; + na.nm_lock = igb_netmap_lock_wrapper; + na.nm_register = igb_netmap_reg; + netmap_attach(&na, adapter->num_queues); +} /* end of file */ diff --git a/sys/dev/netmap/if_lem_netmap.h b/sys/dev/netmap/if_lem_netmap.h index 01823189c854..9f24580a9e8d 100644 --- a/sys/dev/netmap/if_lem_netmap.h +++ b/sys/dev/netmap/if_lem_netmap.h @@ -23,14 +23,14 @@ * SUCH DAMAGE. */ + /* * $FreeBSD$ - * $Id: if_lem_netmap.h 9802 2011-12-02 18:42:37Z luigi $ + * $Id: if_lem_netmap.h 10627 2012-02-23 19:37:15Z luigi $ * - * netmap support for if_lem.c + * netmap support for "lem" * - * For structure and details on the individual functions please see - * ixgbe_netmap.h + * For details on netmap support please see ixgbe_netmap.h */ #include @@ -39,30 +39,6 @@ #include /* vtophys ? */ #include -static int lem_netmap_reg(struct ifnet *, int onoff); -static int lem_netmap_txsync(struct ifnet *, u_int, int); -static int lem_netmap_rxsync(struct ifnet *, u_int, int); -static void lem_netmap_lock_wrapper(struct ifnet *, int, u_int); - - -static void -lem_netmap_attach(struct adapter *adapter) -{ - struct netmap_adapter na; - - bzero(&na, sizeof(na)); - - na.ifp = adapter->ifp; - na.separate_locks = 1; - na.num_tx_desc = adapter->num_tx_desc; - na.num_rx_desc = adapter->num_rx_desc; - na.nm_txsync = lem_netmap_txsync; - na.nm_rxsync = lem_netmap_rxsync; - na.nm_lock = lem_netmap_lock_wrapper; - na.nm_register = lem_netmap_reg; - netmap_attach(&na, 1); -} - static void lem_netmap_lock_wrapper(struct ifnet *ifp, int what, u_int ringid) @@ -94,7 +70,7 @@ lem_netmap_lock_wrapper(struct ifnet *ifp, int what, u_int ringid) /* - * register-unregister routine + * Register/unregister */ static int lem_netmap_reg(struct ifnet *ifp, int onoff) @@ -104,7 +80,7 @@ lem_netmap_reg(struct ifnet *ifp, int onoff) int error = 0; if (na == NULL) - return EINVAL; /* no netmap support here */ + return EINVAL; lem_disable_intr(adapter); @@ -144,20 +120,21 @@ lem_netmap_reg(struct ifnet *ifp, int onoff) /* - * Reconcile hardware and user view of the transmit ring. + * Reconcile kernel and user view of the transmit ring. */ static int lem_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock) { struct adapter *adapter = ifp->if_softc; - struct netmap_adapter *na = NA(adapter->ifp); + struct netmap_adapter *na = NA(ifp); struct netmap_kring *kring = &na->tx_rings[ring_nr]; struct netmap_ring *ring = kring->ring; - int j, k, l, n = 0, lim = kring->nkr_num_slots - 1; + u_int j, k, l, n = 0, lim = kring->nkr_num_slots - 1; /* generate an interrupt approximately every half ring */ int report_frequency = kring->nkr_num_slots >> 1; + /* take a copy of ring->cur now, and never read it again */ k = ring->cur; if (k > lim) return netmap_ring_reinit(kring); @@ -166,17 +143,17 @@ lem_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock) EM_TX_LOCK(adapter); bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, BUS_DMASYNC_POSTREAD); - - /* check for new packets to send. - * j indexes the netmap ring, l indexes the nic ring, and - * j = kring->nr_hwcur, l = E1000_TDT (not tracked), - * j == (l + kring->nkr_hwofs) % ring_size + /* + * Process new packets to send. j is the current index in the + * netmap ring, l is the corresponding index in the NIC ring. */ j = kring->nr_hwcur; - if (j != k) { /* we have packets to send */ - l = netmap_tidx_k2n(na, ring_nr, j); + if (j != k) { /* we have new packets to send */ + l = netmap_idx_k2n(kring, j); for (n = 0; j != k; n++) { + /* slot is the current slot in the netmap ring */ struct netmap_slot *slot = &ring->slot[j]; + /* curr is the current slot in the nic ring */ struct e1000_tx_desc *curr = &adapter->tx_desc_base[l]; struct em_buffer *txbuf = &adapter->tx_buffer_area[l]; int flags = ((slot->flags & NS_REPORT) || @@ -184,7 +161,7 @@ lem_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock) E1000_TXD_CMD_RS : 0; uint64_t paddr; void *addr = PNMB(slot, &paddr); - int len = slot->len; + u_int len = slot->len; if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) { if (do_lock) @@ -193,25 +170,23 @@ lem_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock) } slot->flags &= ~NS_REPORT; + if (slot->flags & NS_BUF_CHANGED) { + /* buffer has changed, reload map */ + netmap_reload_map(adapter->txtag, txbuf->map, addr); + curr->buffer_addr = htole64(paddr); + slot->flags &= ~NS_BUF_CHANGED; + } curr->upper.data = 0; curr->lower.data = htole32( adapter->txd_cmd | len | (E1000_TXD_CMD_EOP | flags) ); - if (slot->flags & NS_BUF_CHANGED) { - curr->buffer_addr = htole64(paddr); - /* buffer has changed, reload map */ - netmap_reload_map(adapter->txtag, txbuf->map, addr); - slot->flags &= ~NS_BUF_CHANGED; - } bus_dmamap_sync(adapter->txtag, txbuf->map, BUS_DMASYNC_PREWRITE); j = (j == lim) ? 0 : j + 1; l = (l == lim) ? 0 : l + 1; } - kring->nr_hwcur = k; - - /* decrease avail by number of sent packets */ + kring->nr_hwcur = k; /* the saved ring->cur */ kring->nr_hwavail -= n; bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, @@ -231,14 +206,14 @@ lem_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock) } delta = l - adapter->next_tx_to_clean; if (delta) { - /* some completed, increment hwavail. */ + /* some tx completed, increment hwavail. */ if (delta < 0) delta += kring->nkr_num_slots; adapter->next_tx_to_clean = l; kring->nr_hwavail += delta; } } - /* update avail to what the hardware knows */ + /* update avail to what the kernel knows */ ring->avail = kring->nr_hwavail; if (do_lock) @@ -254,12 +229,13 @@ static int lem_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock) { struct adapter *adapter = ifp->if_softc; - struct netmap_adapter *na = NA(adapter->ifp); + struct netmap_adapter *na = NA(ifp); struct netmap_kring *kring = &na->rx_rings[ring_nr]; struct netmap_ring *ring = kring->ring; - int j, k, l, n, lim = kring->nkr_num_slots - 1; + int j, l, n, lim = kring->nkr_num_slots - 1; + int force_update = do_lock || kring->nr_kflags & NKR_PENDINTR; + u_int k = ring->cur, resvd = ring->reserved; - k = ring->cur; if (k > lim) return netmap_ring_reinit(kring); @@ -270,42 +246,50 @@ lem_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock) bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - /* import newly received packets into the netmap ring - * j is an index in the netmap ring, l in the NIC ring, and - * j = (kring->nr_hwcur + kring->nr_hwavail) % ring_size - * l = rxr->next_to_check; - * and - * j == (l + kring->nkr_hwofs) % ring_size + /* + * Import newly received packets into the netmap ring. + * j is an index in the netmap ring, l in the NIC ring. */ l = adapter->next_rx_desc_to_check; - j = netmap_ridx_n2k(na, ring_nr, l); - for (n = 0; ; n++) { - struct e1000_rx_desc *curr = &adapter->rx_desc_base[l]; - int len; + j = netmap_idx_n2k(kring, l); + if (netmap_no_pendintr || force_update) { + for (n = 0; ; n++) { + struct e1000_rx_desc *curr = &adapter->rx_desc_base[l]; + uint32_t staterr = le32toh(curr->status); + int len; - if ((curr->status & E1000_RXD_STAT_DD) == 0) - break; - len = le16toh(curr->length) - 4; // CRC - - if (len < 0) { - D("bogus pkt size at %d", j); - len = 0; + if ((staterr & E1000_RXD_STAT_DD) == 0) + break; + len = le16toh(curr->length) - 4; // CRC + if (len < 0) { + D("bogus pkt size at %d", j); + len = 0; + } + ring->slot[j].len = len; + bus_dmamap_sync(adapter->rxtag, + adapter->rx_buffer_area[l].map, + BUS_DMASYNC_POSTREAD); + j = (j == lim) ? 0 : j + 1; + l = (l == lim) ? 0 : l + 1; } - ring->slot[j].len = len; - bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[l].map, - BUS_DMASYNC_POSTREAD); - j = (j == lim) ? 0 : j + 1; - l = (l == lim) ? 0 : l + 1; - } - if (n) { - adapter->next_rx_desc_to_check = l; - kring->nr_hwavail += n; + if (n) { /* update the state variables */ + adapter->next_rx_desc_to_check = l; + kring->nr_hwavail += n; + } + kring->nr_kflags &= ~NKR_PENDINTR; } - /* skip past packets that userspace has already processed */ + /* skip past packets that userspace has released */ j = kring->nr_hwcur; /* netmap ring index */ - if (j != k) { /* userspace has read some packets. */ - l = netmap_ridx_k2n(na, ring_nr, j); /* NIC ring index */ + if (resvd > 0) { + if (resvd + ring->avail >= lim + 1) { + D("XXX invalid reserve/avail %d %d", resvd, ring->avail); + ring->reserved = resvd = 0; // XXX panic... + } + k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd; + } + if (j != k) { /* userspace has released some packets. */ + l = netmap_idx_k2n(kring, j); /* NIC ring index */ for (n = 0; j != k; n++) { struct netmap_slot *slot = &ring->slot[j]; struct e1000_rx_desc *curr = &adapter->rx_desc_base[l]; @@ -319,13 +303,13 @@ lem_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock) return netmap_ring_reinit(kring); } - curr->status = 0; if (slot->flags & NS_BUF_CHANGED) { - curr->buffer_addr = htole64(paddr); /* buffer has changed, reload map */ netmap_reload_map(adapter->rxtag, rxbuf->map, addr); + curr->buffer_addr = htole64(paddr); slot->flags &= ~NS_BUF_CHANGED; } + curr->status = 0; bus_dmamap_sync(adapter->rxtag, rxbuf->map, BUS_DMASYNC_PREREAD); @@ -345,9 +329,29 @@ lem_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock) E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), l); } /* tell userspace that there are new packets */ - ring->avail = kring->nr_hwavail ; + ring->avail = kring->nr_hwavail - resvd; if (do_lock) EM_RX_UNLOCK(adapter); return 0; } + + +static void +lem_netmap_attach(struct adapter *adapter) +{ + struct netmap_adapter na; + + bzero(&na, sizeof(na)); + + na.ifp = adapter->ifp; + na.separate_locks = 1; + na.num_tx_desc = adapter->num_tx_desc; + na.num_rx_desc = adapter->num_rx_desc; + na.nm_txsync = lem_netmap_txsync; + na.nm_rxsync = lem_netmap_rxsync; + na.nm_lock = lem_netmap_lock_wrapper; + na.nm_register = lem_netmap_reg; + netmap_attach(&na, 1); +} + /* end of file */ diff --git a/sys/dev/netmap/if_re_netmap.h b/sys/dev/netmap/if_re_netmap.h index 1ad530794acf..9984186db5e6 100644 --- a/sys/dev/netmap/if_re_netmap.h +++ b/sys/dev/netmap/if_re_netmap.h @@ -25,40 +25,19 @@ /* * $FreeBSD$ - * $Id: if_re_netmap.h 10075 2011-12-25 22:55:48Z luigi $ + * $Id: if_re_netmap.h 10609 2012-02-22 19:44:58Z luigi $ * - * netmap support for if_re + * netmap support for "re" + * For details on netmap support please see ixgbe_netmap.h */ + #include #include #include #include /* vtophys ? */ #include -static int re_netmap_reg(struct ifnet *, int onoff); -static int re_netmap_txsync(struct ifnet *, u_int, int); -static int re_netmap_rxsync(struct ifnet *, u_int, int); -static void re_netmap_lock_wrapper(struct ifnet *, int, u_int); - -static void -re_netmap_attach(struct rl_softc *sc) -{ - struct netmap_adapter na; - - bzero(&na, sizeof(na)); - - na.ifp = sc->rl_ifp; - na.separate_locks = 0; - na.num_tx_desc = sc->rl_ldata.rl_tx_desc_cnt; - na.num_rx_desc = sc->rl_ldata.rl_rx_desc_cnt; - na.nm_txsync = re_netmap_txsync; - na.nm_rxsync = re_netmap_rxsync; - na.nm_lock = re_netmap_lock_wrapper; - na.nm_register = re_netmap_reg; - netmap_attach(&na, 1); -} - /* * wrapper to export locks to the generic code @@ -170,7 +149,7 @@ re_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock) kring->nr_hwavail += n; } - /* update avail to what the hardware knows */ + /* update avail to what the kernel knows */ ring->avail = kring->nr_hwavail; j = kring->nr_hwcur; @@ -211,10 +190,8 @@ re_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock) l = (l == lim) ? 0 : l + 1; } sc->rl_ldata.rl_tx_prodidx = l; - kring->nr_hwcur = k; - - /* decrease avail by number of sent packets */ - ring->avail -= n; + kring->nr_hwcur = k; /* the saved ring->cur */ + ring->avail -= n; // XXX see others kring->nr_hwavail = ring->avail; bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag, @@ -241,7 +218,9 @@ re_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock) struct netmap_adapter *na = NA(sc->rl_ifp); struct netmap_kring *kring = &na->rx_rings[ring_nr]; struct netmap_ring *ring = kring->ring; - int j, k, l, n, lim = kring->nkr_num_slots - 1; + int j, l, n, lim = kring->nkr_num_slots - 1; + int force_update = do_lock || kring->nr_kflags & NKR_PENDINTR; + u_int k = ring->cur, resvd = ring->reserved; k = ring->cur; if (k > lim) @@ -255,45 +234,53 @@ re_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock) BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* + * Import newly received packets into the netmap ring. + * j is an index in the netmap ring, l in the NIC ring. + * * The device uses all the buffers in the ring, so we need * another termination condition in addition to RL_RDESC_STAT_OWN * cleared (all buffers could have it cleared. The easiest one * is to limit the amount of data reported up to 'lim' */ l = sc->rl_ldata.rl_rx_prodidx; /* next pkt to check */ - j = netmap_ridx_n2k(na, ring_nr, l); /* the kring index */ - for (n = kring->nr_hwavail; n < lim ; n++) { - struct rl_desc *cur_rx = &sc->rl_ldata.rl_rx_list[l]; - uint32_t rxstat = le32toh(cur_rx->rl_cmdstat); - uint32_t total_len; + j = netmap_idx_n2k(kring, l); /* the kring index */ + if (netmap_no_pendintr || force_update) { + for (n = kring->nr_hwavail; n < lim ; n++) { + struct rl_desc *cur_rx = &sc->rl_ldata.rl_rx_list[l]; + uint32_t rxstat = le32toh(cur_rx->rl_cmdstat); + uint32_t total_len; - if ((rxstat & RL_RDESC_STAT_OWN) != 0) - break; - total_len = rxstat & sc->rl_rxlenmask; - /* XXX subtract crc */ - total_len = (total_len < 4) ? 0 : total_len - 4; - kring->ring->slot[j].len = total_len; - /* sync was in re_newbuf() */ - bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, - rxd[l].rx_dmamap, BUS_DMASYNC_POSTREAD); - j = (j == lim) ? 0 : j + 1; - l = (l == lim) ? 0 : l + 1; - } - if (n != kring->nr_hwavail) { - sc->rl_ldata.rl_rx_prodidx = l; - sc->rl_ifp->if_ipackets += n - kring->nr_hwavail; - kring->nr_hwavail = n; + if ((rxstat & RL_RDESC_STAT_OWN) != 0) + break; + total_len = rxstat & sc->rl_rxlenmask; + /* XXX subtract crc */ + total_len = (total_len < 4) ? 0 : total_len - 4; + kring->ring->slot[j].len = total_len; + /* sync was in re_newbuf() */ + bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, + rxd[l].rx_dmamap, BUS_DMASYNC_POSTREAD); + j = (j == lim) ? 0 : j + 1; + l = (l == lim) ? 0 : l + 1; + } + if (n != kring->nr_hwavail) { + sc->rl_ldata.rl_rx_prodidx = l; + sc->rl_ifp->if_ipackets += n - kring->nr_hwavail; + kring->nr_hwavail = n; + } + kring->nr_kflags &= ~NKR_PENDINTR; } - /* skip past packets that userspace has already processed, - * making them available for reception. - * advance nr_hwcur and issue a bus_dmamap_sync on the - * buffers so it is safe to write to them. - * Also increase nr_hwavail - */ + /* skip past packets that userspace has released */ j = kring->nr_hwcur; - if (j != k) { /* userspace has read some packets. */ - l = netmap_ridx_k2n(na, ring_nr, j); /* the NIC index */ + if (resvd > 0) { + if (resvd + ring->avail >= lim + 1) { + D("XXX invalid reserve/avail %d %d", resvd, ring->avail); + ring->reserved = resvd = 0; // XXX panic... + } + k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd; + } + if (j != k) { /* userspace has released some packets. */ + l = netmap_idx_k2n(kring, j); /* the NIC index */ for (n = 0; j != k; n++) { struct netmap_slot *slot = ring->slot + j; struct rl_desc *desc = &sc->rl_ldata.rl_rx_list[l]; @@ -310,15 +297,15 @@ re_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock) if (l == lim) /* mark end of ring */ cmd |= RL_RDESC_CMD_EOR; - desc->rl_cmdstat = htole32(cmd); slot->flags &= ~NS_REPORT; if (slot->flags & NS_BUF_CHANGED) { - desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr)); - desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr)); netmap_reload_map(sc->rl_ldata.rl_rx_mtag, rxd[l].rx_dmamap, addr); + desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr)); + desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr)); slot->flags &= ~NS_BUF_CHANGED; } + desc->rl_cmdstat = htole32(cmd); bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd[l].rx_dmamap, BUS_DMASYNC_PREREAD); j = (j == lim) ? 0 : j + 1; @@ -333,7 +320,7 @@ re_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock) BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD); } /* tell userspace that there are new packets */ - ring->avail = kring->nr_hwavail; + ring->avail = kring->nr_hwavail - resvd; if (do_lock) RL_UNLOCK(sc); return 0; @@ -363,7 +350,7 @@ re_netmap_tx_init(struct rl_softc *sc) /* l points in the netmap ring, i points in the NIC ring */ for (i = 0; i < n; i++) { uint64_t paddr; - int l = netmap_tidx_n2k(na, 0, i); + int l = netmap_idx_n2k(&na->tx_rings[0], i); void *addr = PNMB(slot + l, &paddr); desc[i].rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr)); @@ -394,7 +381,7 @@ re_netmap_rx_init(struct rl_softc *sc) for (i = 0; i < n; i++) { void *addr; uint64_t paddr; - int l = netmap_ridx_n2k(na, 0, i); + int l = netmap_idx_n2k(&na->rx_rings[0], i); addr = PNMB(slot + l, &paddr); @@ -412,3 +399,23 @@ re_netmap_rx_init(struct rl_softc *sc) desc[i].rl_cmdstat = htole32(cmdstat); } } + + +static void +re_netmap_attach(struct rl_softc *sc) +{ + struct netmap_adapter na; + + bzero(&na, sizeof(na)); + + na.ifp = sc->rl_ifp; + na.separate_locks = 0; + na.num_tx_desc = sc->rl_ldata.rl_tx_desc_cnt; + na.num_rx_desc = sc->rl_ldata.rl_rx_desc_cnt; + na.nm_txsync = re_netmap_txsync; + na.nm_rxsync = re_netmap_rxsync; + na.nm_lock = re_netmap_lock_wrapper; + na.nm_register = re_netmap_reg; + netmap_attach(&na, 1); +} +/* end of file */ diff --git a/sys/dev/netmap/ixgbe_netmap.h b/sys/dev/netmap/ixgbe_netmap.h index b0a203ae94c1..c71111041ab7 100644 --- a/sys/dev/netmap/ixgbe_netmap.h +++ b/sys/dev/netmap/ixgbe_netmap.h @@ -25,7 +25,7 @@ /* * $FreeBSD$ - * $Id: ixgbe_netmap.h 9802 2011-12-02 18:42:37Z luigi $ + * $Id: ixgbe_netmap.h 10627 2012-02-23 19:37:15Z luigi $ * * netmap modifications for ixgbe * @@ -47,44 +47,8 @@ #include */ - #include -/* - * prototypes for the new API calls that are used by the - * *_netmap_attach() routine. - */ -static int ixgbe_netmap_reg(struct ifnet *, int onoff); -static int ixgbe_netmap_txsync(struct ifnet *, u_int, int); -static int ixgbe_netmap_rxsync(struct ifnet *, u_int, int); -static void ixgbe_netmap_lock_wrapper(struct ifnet *, int, u_int); - - -/* - * The attach routine, called near the end of ixgbe_attach(), - * fills the parameters for netmap_attach() and calls it. - * It cannot fail, in the worst case (such as no memory) - * netmap mode will be disabled and the driver will only - * operate in standard mode. - */ -static void -ixgbe_netmap_attach(struct adapter *adapter) -{ - struct netmap_adapter na; - - bzero(&na, sizeof(na)); - - na.ifp = adapter->ifp; - na.separate_locks = 1; /* this card has separate rx/tx locks */ - na.num_tx_desc = adapter->num_tx_desc; - na.num_rx_desc = adapter->num_rx_desc; - na.nm_txsync = ixgbe_netmap_txsync; - na.nm_rxsync = ixgbe_netmap_rxsync; - na.nm_lock = ixgbe_netmap_lock_wrapper; - na.nm_register = ixgbe_netmap_reg; - netmap_attach(&na, adapter->num_queues); -} - /* * wrapper to export locks to the generic netmap code. @@ -119,7 +83,7 @@ ixgbe_netmap_lock_wrapper(struct ifnet *_a, int what, u_int queueid) /* - * Netmap register/unregister. We are already under core lock. + * Register/unregister. We are already under core lock. * Only called on the first register or the last unregister. */ static int @@ -129,8 +93,8 @@ ixgbe_netmap_reg(struct ifnet *ifp, int onoff) struct netmap_adapter *na = NA(ifp); int error = 0; - if (!na) /* probably, netmap_attach() failed */ - return EINVAL; + if (na == NULL) + return EINVAL; /* no netmap support here */ ixgbe_disable_intr(adapter); @@ -197,7 +161,7 @@ ixgbe_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock) struct netmap_adapter *na = NA(adapter->ifp); struct netmap_kring *kring = &na->tx_rings[ring_nr]; struct netmap_ring *ring = kring->ring; - int j, k, l, n = 0, lim = kring->nkr_num_slots - 1; + u_int j, k = ring->cur, l, n = 0, lim = kring->nkr_num_slots - 1; /* * ixgbe can generate an interrupt on every tx packet, but it @@ -206,20 +170,10 @@ ixgbe_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock) */ int report_frequency = kring->nkr_num_slots >> 1; + if (k > lim) + return netmap_ring_reinit(kring); if (do_lock) IXGBE_TX_LOCK(txr); - /* take a copy of ring->cur now, and never read it again */ - k = ring->cur; - /* do a sanity check on cur - hwcur XXX verify */ - l = k - kring->nr_hwcur; - if (l < 0) - l += lim + 1; - /* if cur is invalid reinitialize the ring. */ - if (k > lim || l > kring->nr_hwavail) { - if (do_lock) - IXGBE_TX_UNLOCK(txr); - return netmap_ring_reinit(kring); - } bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_POSTREAD); @@ -241,7 +195,9 @@ ixgbe_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock) */ j = kring->nr_hwcur; if (j != k) { /* we have new packets to send */ - l = netmap_tidx_k2n(na, ring_nr, j); /* NIC index */ + prefetch(&ring->slot[j]); + l = netmap_idx_k2n(kring, j); /* NIC index */ + prefetch(&txr->tx_buffers[l]); for (n = 0; j != k; n++) { /* * Collect per-slot info. @@ -253,17 +209,25 @@ ixgbe_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock) * Many other drivers preserve the address, so * we only need to access it if NS_BUF_CHANGED * is set. + * XXX note, on this device the dmamap* calls are + * not necessary because tag is 0, however just accessing + * the per-packet tag kills 1Mpps at 900 MHz. */ struct netmap_slot *slot = &ring->slot[j]; - struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[l]; union ixgbe_adv_tx_desc *curr = &txr->tx_base[l]; + struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[l]; uint64_t paddr; - void *addr = PNMB(slot, &paddr); // XXX type for flags and len ? int flags = ((slot->flags & NS_REPORT) || j == 0 || j == report_frequency) ? IXGBE_TXD_CMD_RS : 0; - int len = slot->len; + u_int len = slot->len; + void *addr = PNMB(slot, &paddr); + + j = (j == lim) ? 0 : j + 1; + l = (l == lim) ? 0 : l + 1; + prefetch(&ring->slot[j]); + prefetch(&txr->tx_buffers[l]); /* * Quick check for valid addr and len. @@ -279,35 +243,29 @@ ixgbe_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock) return netmap_ring_reinit(kring); } - slot->flags &= ~NS_REPORT; if (slot->flags & NS_BUF_CHANGED) { /* buffer has changed, unload and reload map */ netmap_reload_map(txr->txtag, txbuf->map, addr); slot->flags &= ~NS_BUF_CHANGED; } + slot->flags &= ~NS_REPORT; /* * Fill the slot in the NIC ring. * In this driver we need to rewrite the buffer * address in the NIC ring. Other drivers do not * need this. + * Use legacy descriptor, it is faster. */ curr->read.buffer_addr = htole64(paddr); - curr->read.olinfo_status = htole32(len << IXGBE_ADVTXD_PAYLEN_SHIFT); - curr->read.cmd_type_len = - htole32(txr->txd_cmd | len | - (IXGBE_ADVTXD_DTYP_DATA | - IXGBE_ADVTXD_DCMD_DEXT | - IXGBE_ADVTXD_DCMD_IFCS | - IXGBE_TXD_CMD_EOP | flags) ); + curr->read.olinfo_status = 0; + curr->read.cmd_type_len = htole32(len | flags | + IXGBE_ADVTXD_DCMD_IFCS | IXGBE_TXD_CMD_EOP); /* make sure changes to the buffer are synced */ - bus_dmamap_sync(txr->txtag, txbuf->map, - BUS_DMASYNC_PREWRITE); - j = (j == lim) ? 0 : j + 1; - l = (l == lim) ? 0 : l + 1; + bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_PREWRITE); } kring->nr_hwcur = k; /* the saved ring->cur */ - /* decrease avail by number of sent packets */ + /* decrease avail by number of packets sent */ kring->nr_hwavail -= n; /* synchronize the NIC ring */ @@ -416,20 +374,15 @@ ixgbe_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock) struct netmap_adapter *na = NA(adapter->ifp); struct netmap_kring *kring = &na->rx_rings[ring_nr]; struct netmap_ring *ring = kring->ring; - int j, k, l, n, lim = kring->nkr_num_slots - 1; + u_int j, l, n, lim = kring->nkr_num_slots - 1; int force_update = do_lock || kring->nr_kflags & NKR_PENDINTR; + u_int k = ring->cur, resvd = ring->reserved; - k = ring->cur; /* cache and check value, same as in txsync */ - n = k - kring->nr_hwcur; - if (n < 0) - n += lim + 1; - if (k > lim || n > kring->nr_hwavail) /* userspace is cheating */ + if (k > lim) return netmap_ring_reinit(kring); if (do_lock) IXGBE_RX_LOCK(rxr); - if (n < 0) - n += lim + 1; /* XXX check sync modes */ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); @@ -450,7 +403,7 @@ ixgbe_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock) * rxr->next_to_check is set to 0 on a ring reinit */ l = rxr->next_to_check; - j = netmap_ridx_n2k(na, ring_nr, l); + j = netmap_idx_n2k(kring, l); if (netmap_no_pendintr || force_update) { for (n = 0; ; n++) { @@ -473,15 +426,22 @@ ixgbe_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock) } /* - * Skip past packets that userspace has already processed - * (from kring->nr_hwcur to ring->cur excluded), and make - * the buffers available for reception. + * Skip past packets that userspace has released + * (from kring->nr_hwcur to ring->cur - ring->reserved excluded), + * and make the buffers available for reception. * As usual j is the index in the netmap ring, l is the index * in the NIC ring, and j == (l + kring->nkr_hwofs) % ring_size */ j = kring->nr_hwcur; - if (j != k) { /* userspace has read some packets. */ - l = netmap_ridx_k2n(na, ring_nr, j); + if (resvd > 0) { + if (resvd + ring->avail >= lim + 1) { + D("XXX invalid reserve/avail %d %d", resvd, ring->avail); + ring->reserved = resvd = 0; // XXX panic... + } + k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd; + } + if (j != k) { /* userspace has released some packets. */ + l = netmap_idx_k2n(kring, j); for (n = 0; j != k; n++) { /* collect per-slot info, with similar validations * and flag handling as in the txsync code. @@ -522,7 +482,7 @@ ixgbe_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock) IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), l); } /* tell userspace that there are new packets */ - ring->avail = kring->nr_hwavail ; + ring->avail = kring->nr_hwavail - resvd; if (do_lock) IXGBE_RX_UNLOCK(rxr); @@ -533,4 +493,31 @@ ixgbe_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock) IXGBE_RX_UNLOCK(rxr); return netmap_ring_reinit(kring); } + + +/* + * The attach routine, called near the end of ixgbe_attach(), + * fills the parameters for netmap_attach() and calls it. + * It cannot fail, in the worst case (such as no memory) + * netmap mode will be disabled and the driver will only + * operate in standard mode. + */ +static void +ixgbe_netmap_attach(struct adapter *adapter) +{ + struct netmap_adapter na; + + bzero(&na, sizeof(na)); + + na.ifp = adapter->ifp; + na.separate_locks = 1; /* this card has separate rx/tx locks */ + na.num_tx_desc = adapter->num_tx_desc; + na.num_rx_desc = adapter->num_rx_desc; + na.nm_txsync = ixgbe_netmap_txsync; + na.nm_rxsync = ixgbe_netmap_rxsync; + na.nm_lock = ixgbe_netmap_lock_wrapper; + na.nm_register = ixgbe_netmap_reg; + netmap_attach(&na, adapter->num_queues); +} + /* end of file */ diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c index 8dc62d8fd4ef..ae9a599ee916 100644 --- a/sys/dev/netmap/netmap.c +++ b/sys/dev/netmap/netmap.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -9,7 +9,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * + * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -87,10 +87,10 @@ MALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map"); /* * lock and unlock for the netmap memory allocator */ -#define NMA_LOCK() mtx_lock(&netmap_mem_d->nm_mtx); -#define NMA_UNLOCK() mtx_unlock(&netmap_mem_d->nm_mtx); +#define NMA_LOCK() mtx_lock(&nm_mem->nm_mtx); +#define NMA_UNLOCK() mtx_unlock(&nm_mem->nm_mtx); struct netmap_mem_d; -static struct netmap_mem_d *netmap_mem_d; /* Our memory allocator. */ +static struct netmap_mem_d *nm_mem; /* Our memory allocator. */ u_int netmap_total_buffers; char *netmap_buffer_base; /* address of an invalid buffer */ @@ -254,10 +254,10 @@ struct netmap_mem_d { /* Shorthand to compute a netmap interface offset. */ #define netmap_if_offset(v) \ - ((char *) (v) - (char *) netmap_mem_d->nm_buffer) + ((char *) (v) - (char *) nm_mem->nm_buffer) /* .. and get a physical address given a memory offset */ #define netmap_ofstophys(o) \ - (vtophys(netmap_mem_d->nm_buffer) + (o)) + (vtophys(nm_mem->nm_buffer) + (o)) /*------ netmap memory allocator -------*/ @@ -265,7 +265,7 @@ struct netmap_mem_d { * Request for a chunk of memory. * * Memory objects are arranged into a list, hence we need to walk this - * list until we find an object with the needed amount of data free. + * list until we find an object with the needed amount of data free. * This sounds like a completely inefficient implementation, but given * the fact that data allocation is done once, we can handle it * flawlessly. @@ -279,7 +279,7 @@ netmap_malloc(size_t size, __unused const char *msg) void *ret = NULL; NMA_LOCK(); - TAILQ_FOREACH(mem_obj, &netmap_mem_d->nm_molist, nmo_next) { + TAILQ_FOREACH(mem_obj, &nm_mem->nm_molist, nmo_next) { if (mem_obj->nmo_used != 0 || mem_obj->nmo_size < size) continue; @@ -295,7 +295,7 @@ netmap_malloc(size_t size, __unused const char *msg) mem_obj->nmo_size -= size; mem_obj->nmo_data = (char *) mem_obj->nmo_data + size; if (mem_obj->nmo_size == 0) { - TAILQ_REMOVE(&netmap_mem_d->nm_molist, mem_obj, + TAILQ_REMOVE(&nm_mem->nm_molist, mem_obj, nmo_next); free(mem_obj, M_NETMAP); } @@ -328,7 +328,7 @@ netmap_free(void *addr, const char *msg) } NMA_LOCK(); - TAILQ_FOREACH(cur, &netmap_mem_d->nm_molist, nmo_next) { + TAILQ_FOREACH(cur, &nm_mem->nm_molist, nmo_next) { if (cur->nmo_data == addr && cur->nmo_used) break; } @@ -345,7 +345,7 @@ netmap_free(void *addr, const char *msg) if present. */ prev = TAILQ_PREV(cur, netmap_mem_obj_h, nmo_next); if (prev && prev->nmo_used == 0) { - TAILQ_REMOVE(&netmap_mem_d->nm_molist, cur, nmo_next); + TAILQ_REMOVE(&nm_mem->nm_molist, cur, nmo_next); prev->nmo_size += cur->nmo_size; free(cur, M_NETMAP); cur = prev; @@ -354,7 +354,7 @@ netmap_free(void *addr, const char *msg) /* merge with the next one */ next = TAILQ_NEXT(cur, nmo_next); if (next && next->nmo_used == 0) { - TAILQ_REMOVE(&netmap_mem_d->nm_molist, next, nmo_next); + TAILQ_REMOVE(&nm_mem->nm_molist, next, nmo_next); cur->nmo_size += next->nmo_size; free(next, M_NETMAP); } @@ -374,21 +374,24 @@ netmap_if_new(const char *ifname, struct netmap_adapter *na) { struct netmap_if *nifp; struct netmap_ring *ring; + struct netmap_kring *kring; char *buff; - u_int i, len, ofs; - u_int n = na->num_queues + 1; /* shorthand, include stack queue */ + u_int i, len, ofs, numdesc; + u_int nrx = na->num_rx_queues + 1; /* shorthand, include stack queue */ + u_int ntx = na->num_tx_queues + 1; /* shorthand, include stack queue */ /* * the descriptor is followed inline by an array of offsets * to the tx and rx rings in the shared memory region. */ - len = sizeof(struct netmap_if) + 2 * n * sizeof(ssize_t); + len = sizeof(struct netmap_if) + (nrx + ntx) * sizeof(ssize_t); nifp = netmap_if_malloc(len); if (nifp == NULL) return (NULL); /* initialize base fields */ - *(int *)(uintptr_t)&nifp->ni_num_queues = na->num_queues; + *(int *)(uintptr_t)&nifp->ni_rx_queues = na->num_rx_queues; + *(int *)(uintptr_t)&nifp->ni_tx_queues = na->num_tx_queues; strncpy(nifp->ni_name, ifname, IFNAMSIZ); (na->refcount)++; /* XXX atomic ? we are under lock */ @@ -396,16 +399,15 @@ netmap_if_new(const char *ifname, struct netmap_adapter *na) goto final; /* - * If this is the first instance, allocate the shadow rings and - * buffers for this card (one for each hw queue, one for the host). + * First instance. Allocate the netmap rings + * (one for each hw queue, one pair for the host). * The rings are contiguous, but have variable size. * The entire block is reachable at - * na->tx_rings[0].ring + * na->tx_rings[0] */ - - len = n * (2 * sizeof(struct netmap_ring) + - (na->num_tx_desc + na->num_rx_desc) * - sizeof(struct netmap_slot) ); + len = (ntx + nrx) * sizeof(struct netmap_ring) + + (ntx * na->num_tx_desc + nrx * na->num_rx_desc) * + sizeof(struct netmap_slot); buff = netmap_ring_malloc(len); if (buff == NULL) { D("failed to allocate %d bytes for %s shadow ring", @@ -415,9 +417,8 @@ netmap_if_new(const char *ifname, struct netmap_adapter *na) netmap_if_free(nifp); return (NULL); } - /* do we have the bufers ? we are in need of num_tx_desc buffers for - * each tx ring and num_tx_desc buffers for each rx ring. */ - len = n * (na->num_tx_desc + na->num_rx_desc); + /* Check whether we have enough buffers */ + len = ntx * na->num_tx_desc + nrx * na->num_rx_desc; NMA_LOCK(); if (nm_buf_pool.free < len) { NMA_UNLOCK(); @@ -429,11 +430,7 @@ netmap_if_new(const char *ifname, struct netmap_adapter *na) * and initialize the rings. We are under NMA_LOCK(). */ ofs = 0; - for (i = 0; i < n; i++) { - struct netmap_kring *kring; - int numdesc; - - /* Transmit rings */ + for (i = 0; i < ntx; i++) { /* Transmit rings */ kring = &na->tx_rings[i]; numdesc = na->num_tx_desc; bzero(kring, sizeof(*kring)); @@ -459,8 +456,9 @@ netmap_if_new(const char *ifname, struct netmap_adapter *na) ofs += sizeof(struct netmap_ring) + numdesc * sizeof(struct netmap_slot); + } - /* Receive rings */ + for (i = 0; i < nrx; i++) { /* Receive rings */ kring = &na->rx_rings[i]; numdesc = na->num_rx_desc; bzero(kring, sizeof(*kring)); @@ -480,21 +478,21 @@ netmap_if_new(const char *ifname, struct netmap_adapter *na) numdesc * sizeof(struct netmap_slot); } NMA_UNLOCK(); - for (i = 0; i < n+1; i++) { - // XXX initialize the selrecord structs. - } + // XXX initialize the selrecord structs. + final: /* * fill the slots for the rx and tx queues. They contain the offset * between the ring and nifp, so the information is usable in * userspace to reach the ring from the nifp. */ - for (i = 0; i < n; i++) { - char *base = (char *)nifp; + for (i = 0; i < ntx; i++) { *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i] = - (char *)na->tx_rings[i].ring - base; - *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+n] = - (char *)na->rx_rings[i].ring - base; + (char *)na->tx_rings[i].ring - (char *)nifp; + } + for (i = 0; i < nrx; i++) { + *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+ntx] = + (char *)na->rx_rings[i].ring - (char *)nifp; } return (nifp); } @@ -532,17 +530,17 @@ netmap_memory_init(void) ); if (buf) break; - } + } if (buf == NULL) return (ENOMEM); sz += extra_sz; - netmap_mem_d = malloc(sizeof(struct netmap_mem_d), M_NETMAP, + nm_mem = malloc(sizeof(struct netmap_mem_d), M_NETMAP, M_WAITOK | M_ZERO); - mtx_init(&netmap_mem_d->nm_mtx, "netmap memory allocator lock", NULL, + mtx_init(&nm_mem->nm_mtx, "netmap memory allocator lock", NULL, MTX_DEF); - TAILQ_INIT(&netmap_mem_d->nm_molist); - netmap_mem_d->nm_buffer = buf; - netmap_mem_d->nm_totalsize = sz; + TAILQ_INIT(&nm_mem->nm_molist); + nm_mem->nm_buffer = buf; + nm_mem->nm_totalsize = sz; /* * A buffer takes 2k, a slot takes 8 bytes + ring overhead, @@ -550,24 +548,24 @@ netmap_memory_init(void) * the memory for the rings, and the rest for the buffers, * and be sure we never run out. */ - netmap_mem_d->nm_size = sz/200; - netmap_mem_d->nm_buf_start = - (netmap_mem_d->nm_size + PAGE_SIZE - 1) & ~(PAGE_SIZE-1); - netmap_mem_d->nm_buf_len = sz - netmap_mem_d->nm_buf_start; + nm_mem->nm_size = sz/200; + nm_mem->nm_buf_start = + (nm_mem->nm_size + PAGE_SIZE - 1) & ~(PAGE_SIZE-1); + nm_mem->nm_buf_len = sz - nm_mem->nm_buf_start; - nm_buf_pool.base = netmap_mem_d->nm_buffer; - nm_buf_pool.base += netmap_mem_d->nm_buf_start; + nm_buf_pool.base = nm_mem->nm_buffer; + nm_buf_pool.base += nm_mem->nm_buf_start; netmap_buffer_base = nm_buf_pool.base; D("netmap_buffer_base %p (offset %d)", - netmap_buffer_base, (int)netmap_mem_d->nm_buf_start); + netmap_buffer_base, (int)nm_mem->nm_buf_start); /* number of buffers, they all start as free */ netmap_total_buffers = nm_buf_pool.total_buffers = - netmap_mem_d->nm_buf_len / NETMAP_BUF_SIZE; + nm_mem->nm_buf_len / NETMAP_BUF_SIZE; nm_buf_pool.bufsize = NETMAP_BUF_SIZE; D("Have %d MB, use %dKB for rings, %d buffers at %p", - (sz >> 20), (int)(netmap_mem_d->nm_size >> 10), + (sz >> 20), (int)(nm_mem->nm_size >> 10), nm_buf_pool.total_buffers, nm_buf_pool.base); /* allocate and initialize the bitmap. Entry 0 is considered @@ -583,10 +581,10 @@ netmap_memory_init(void) mem_obj = malloc(sizeof(struct netmap_mem_obj), M_NETMAP, M_WAITOK | M_ZERO); - TAILQ_INSERT_HEAD(&netmap_mem_d->nm_molist, mem_obj, nmo_next); + TAILQ_INSERT_HEAD(&nm_mem->nm_molist, mem_obj, nmo_next); mem_obj->nmo_used = 0; - mem_obj->nmo_size = netmap_mem_d->nm_size; - mem_obj->nmo_data = netmap_mem_d->nm_buffer; + mem_obj->nmo_size = nm_mem->nm_size; + mem_obj->nmo_data = nm_mem->nm_buffer; return (0); } @@ -603,9 +601,9 @@ netmap_memory_fini(void) { struct netmap_mem_obj *mem_obj; - while (!TAILQ_EMPTY(&netmap_mem_d->nm_molist)) { - mem_obj = TAILQ_FIRST(&netmap_mem_d->nm_molist); - TAILQ_REMOVE(&netmap_mem_d->nm_molist, mem_obj, nmo_next); + while (!TAILQ_EMPTY(&nm_mem->nm_molist)) { + mem_obj = TAILQ_FIRST(&nm_mem->nm_molist); + TAILQ_REMOVE(&nm_mem->nm_molist, mem_obj, nmo_next); if (mem_obj->nmo_used == 1) { printf("netmap: leaked %d bytes at %p\n", (int)mem_obj->nmo_size, @@ -613,9 +611,9 @@ netmap_memory_fini(void) } free(mem_obj, M_NETMAP); } - contigfree(netmap_mem_d->nm_buffer, netmap_mem_d->nm_totalsize, M_NETMAP); + contigfree(nm_mem->nm_buffer, nm_mem->nm_totalsize, M_NETMAP); // XXX mutex_destroy(nm_mtx); - free(netmap_mem_d, M_NETMAP); + free(nm_mem, M_NETMAP); } /*------------- end of memory allocator -----------------*/ @@ -647,7 +645,7 @@ netmap_dtor_locked(void *data) na->refcount--; if (na->refcount <= 0) { /* last instance */ - u_int i; + u_int i, j, lim; D("deleting last netmap instance for %s", ifp->if_xname); /* @@ -669,24 +667,22 @@ netmap_dtor_locked(void *data) /* Wake up any sleeping threads. netmap_poll will * then return POLLERR */ - for (i = 0; i < na->num_queues + 2; i++) { + for (i = 0; i < na->num_tx_queues + 1; i++) selwakeuppri(&na->tx_rings[i].si, PI_NET); + for (i = 0; i < na->num_rx_queues + 1; i++) selwakeuppri(&na->rx_rings[i].si, PI_NET); - } + selwakeuppri(&na->tx_si, PI_NET); + selwakeuppri(&na->rx_si, PI_NET); /* release all buffers */ NMA_LOCK(); - for (i = 0; i < na->num_queues + 1; i++) { - int j, lim; - struct netmap_ring *ring; - - ND("tx queue %d", i); - ring = na->tx_rings[i].ring; + for (i = 0; i < na->num_tx_queues + 1; i++) { + struct netmap_ring *ring = na->tx_rings[i].ring; lim = na->tx_rings[i].nkr_num_slots; for (j = 0; j < lim; j++) netmap_free_buf(nifp, ring->slot[j].buf_idx); - - ND("rx queue %d", i); - ring = na->rx_rings[i].ring; + } + for (i = 0; i < na->num_rx_queues + 1; i++) { + struct netmap_ring *ring = na->rx_rings[i].ring; lim = na->rx_rings[i].nkr_num_slots; for (j = 0; j < lim; j++) netmap_free_buf(nifp, ring->slot[j].buf_idx); @@ -708,7 +704,7 @@ netmap_dtor(void *data) na->nm_lock(ifp, NETMAP_REG_LOCK, 0); netmap_dtor_locked(data); - na->nm_lock(ifp, NETMAP_REG_UNLOCK, 0); + na->nm_lock(ifp, NETMAP_REG_UNLOCK, 0); if_rele(ifp); bzero(priv, sizeof(*priv)); /* XXX for safety */ @@ -758,7 +754,7 @@ netmap_mmap(__unused struct cdev *dev, static void netmap_sync_to_host(struct netmap_adapter *na) { - struct netmap_kring *kring = &na->tx_rings[na->num_queues]; + struct netmap_kring *kring = &na->tx_rings[na->num_tx_queues]; struct netmap_ring *ring = kring->ring; struct mbuf *head = NULL, *tail = NULL, *m; u_int k, n, lim = kring->nkr_num_slots - 1; @@ -818,31 +814,37 @@ netmap_sync_to_host(struct netmap_adapter *na) static void netmap_sync_from_host(struct netmap_adapter *na, struct thread *td) { - struct netmap_kring *kring = &na->rx_rings[na->num_queues]; + struct netmap_kring *kring = &na->rx_rings[na->num_rx_queues]; struct netmap_ring *ring = kring->ring; - int error = 1, delta; - u_int k = ring->cur, lim = kring->nkr_num_slots; + u_int j, n, lim = kring->nkr_num_slots; + u_int k = ring->cur, resvd = ring->reserved; na->nm_lock(na->ifp, NETMAP_CORE_LOCK, 0); - if (k >= lim) /* bad value */ - goto done; - delta = k - kring->nr_hwcur; - if (delta < 0) - delta += lim; - kring->nr_hwavail -= delta; - if (kring->nr_hwavail < 0) /* error */ - goto done; + if (k >= lim) { + netmap_ring_reinit(kring); + return; + } + /* new packets are already set in nr_hwavail */ + /* skip past packets that userspace has released */ + j = kring->nr_hwcur; + if (resvd > 0) { + if (resvd + ring->avail >= lim + 1) { + D("XXX invalid reserve/avail %d %d", resvd, ring->avail); + ring->reserved = resvd = 0; // XXX panic... + } + k = (k >= resvd) ? k - resvd : k + lim - resvd; + } + if (j != k) { + n = k >= j ? k - j : k + lim - j; + kring->nr_hwavail -= n; kring->nr_hwcur = k; - error = 0; - k = ring->avail = kring->nr_hwavail; + } + k = ring->avail = kring->nr_hwavail - resvd; if (k == 0 && td) selrecord(td, &kring->si); if (k && (netmap_verbose & NM_VERB_HOST)) D("%d pkts from stack", k); -done: na->nm_lock(na->ifp, NETMAP_CORE_UNLOCK, 0); - if (error) - netmap_ring_reinit(kring); } @@ -907,13 +909,13 @@ netmap_ring_reinit(struct netmap_kring *kring) } if (errors) { int pos = kring - kring->na->tx_rings; - int n = kring->na->num_queues + 2; + int n = kring->na->num_tx_queues + 1; D("total %d errors", errors); errors++; D("%s %s[%d] reinit, cur %d -> %d avail %d -> %d", kring->na->ifp->if_xname, - pos < n ? "TX" : "RX", pos < n ? pos : pos - n, + pos < n ? "TX" : "RX", pos < n ? pos : pos - n, ring->cur, kring->nr_hwcur, ring->avail, kring->nr_hwavail); ring->cur = kring->nr_hwcur; @@ -933,10 +935,13 @@ netmap_set_ringid(struct netmap_priv_d *priv, u_int ringid) struct ifnet *ifp = priv->np_ifp; struct netmap_adapter *na = NA(ifp); u_int i = ringid & NETMAP_RING_MASK; - /* first time we don't lock */ + /* initially (np_qfirst == np_qlast) we don't want to lock */ int need_lock = (priv->np_qfirst != priv->np_qlast); + int lim = na->num_rx_queues; - if ( (ringid & NETMAP_HW_RING) && i >= na->num_queues) { + if (na->num_tx_queues > lim) + lim = na->num_tx_queues; + if ( (ringid & NETMAP_HW_RING) && i >= lim) { D("invalid ring id %d", i); return (EINVAL); } @@ -944,14 +949,14 @@ netmap_set_ringid(struct netmap_priv_d *priv, u_int ringid) na->nm_lock(ifp, NETMAP_CORE_LOCK, 0); priv->np_ringid = ringid; if (ringid & NETMAP_SW_RING) { - priv->np_qfirst = na->num_queues; - priv->np_qlast = na->num_queues + 1; + priv->np_qfirst = NETMAP_SW_RING; + priv->np_qlast = 0; } else if (ringid & NETMAP_HW_RING) { priv->np_qfirst = i; priv->np_qlast = i + 1; } else { priv->np_qfirst = 0; - priv->np_qlast = na->num_queues; + priv->np_qlast = NETMAP_HW_RING ; } priv->np_txpoll = (ringid & NETMAP_NO_TX_POLL) ? 0 : 1; if (need_lock) @@ -962,8 +967,7 @@ netmap_set_ringid(struct netmap_priv_d *priv, u_int ringid) D("ringid %s set to HW RING %d", ifp->if_xname, priv->np_qfirst); else - D("ringid %s set to all %d HW RINGS", ifp->if_xname, - priv->np_qlast); + D("ringid %s set to all %d HW RINGS", ifp->if_xname, lim); return 0; } @@ -989,7 +993,7 @@ netmap_ioctl(__unused struct cdev *dev, u_long cmd, caddr_t data, struct nmreq *nmr = (struct nmreq *) data; struct netmap_adapter *na; int error; - u_int i; + u_int i, lim; struct netmap_if *nifp; CURVNET_SET(TD_TO_VNET(td)); @@ -1004,22 +1008,36 @@ netmap_ioctl(__unused struct cdev *dev, u_long cmd, caddr_t data, switch (cmd) { case NIOCGINFO: /* return capabilities etc */ /* memsize is always valid */ - nmr->nr_memsize = netmap_mem_d->nm_totalsize; + nmr->nr_memsize = nm_mem->nm_totalsize; nmr->nr_offset = 0; - nmr->nr_numrings = 0; - nmr->nr_numslots = 0; + nmr->nr_rx_rings = nmr->nr_tx_rings = 0; + nmr->nr_rx_slots = nmr->nr_tx_slots = 0; + if (nmr->nr_version != NETMAP_API) { + D("API mismatch got %d have %d", + nmr->nr_version, NETMAP_API); + nmr->nr_version = NETMAP_API; + error = EINVAL; + break; + } if (nmr->nr_name[0] == '\0') /* just get memory info */ break; error = get_ifp(nmr->nr_name, &ifp); /* get a refcount */ if (error) break; na = NA(ifp); /* retrieve netmap_adapter */ - nmr->nr_numrings = na->num_queues; - nmr->nr_numslots = na->num_tx_desc; + nmr->nr_rx_rings = na->num_rx_queues; + nmr->nr_tx_rings = na->num_tx_queues; + nmr->nr_rx_slots = na->num_rx_desc; + nmr->nr_tx_slots = na->num_tx_desc; if_rele(ifp); /* return the refcount */ break; case NIOCREGIF: + if (nmr->nr_version != NETMAP_API) { + nmr->nr_version = NETMAP_API; + error = EINVAL; + break; + } if (priv != NULL) { /* thread already registered */ error = netmap_set_ringid(priv, nmr->nr_ringid); break; @@ -1095,9 +1113,11 @@ netmap_ioctl(__unused struct cdev *dev, u_long cmd, caddr_t data, } /* return the offset of the netmap_if object */ - nmr->nr_numrings = na->num_queues; - nmr->nr_numslots = na->num_tx_desc; - nmr->nr_memsize = netmap_mem_d->nm_totalsize; + nmr->nr_rx_rings = na->num_rx_queues; + nmr->nr_tx_rings = na->num_tx_queues; + nmr->nr_rx_slots = na->num_rx_desc; + nmr->nr_tx_slots = na->num_tx_desc; + nmr->nr_memsize = nm_mem->nm_totalsize; nmr->nr_offset = netmap_if_offset(nifp); break; @@ -1120,17 +1140,19 @@ netmap_ioctl(__unused struct cdev *dev, u_long cmd, caddr_t data, } ifp = priv->np_ifp; /* we have a reference */ na = NA(ifp); /* retrieve netmap adapter */ - - if (priv->np_qfirst == na->num_queues) { - /* queues to/from host */ + if (priv->np_qfirst == NETMAP_SW_RING) { /* host rings */ if (cmd == NIOCTXSYNC) netmap_sync_to_host(na); else netmap_sync_from_host(na, NULL); break; } + /* find the last ring to scan */ + lim = priv->np_qlast; + if (lim == NETMAP_HW_RING) + lim = (cmd == NIOCTXSYNC) ? na->num_tx_queues : na->num_rx_queues; - for (i = priv->np_qfirst; i < priv->np_qlast; i++) { + for (i = priv->np_qfirst; i < lim; i++) { if (cmd == NIOCTXSYNC) { struct netmap_kring *kring = &na->tx_rings[i]; if (netmap_verbose & NM_VERB_TXSYNC) @@ -1195,6 +1217,7 @@ netmap_poll(__unused struct cdev *dev, int events, struct thread *td) struct ifnet *ifp; struct netmap_kring *kring; u_int core_lock, i, check_all, want_tx, want_rx, revents = 0; + u_int lim_tx, lim_rx; enum {NO_CL, NEED_CL, LOCKED_CL }; /* see below */ if (devfs_get_cdevpriv((void **)&priv) != 0 || priv == NULL) @@ -1212,17 +1235,18 @@ netmap_poll(__unused struct cdev *dev, int events, struct thread *td) na = NA(ifp); /* retrieve netmap adapter */ + lim_tx = na->num_tx_queues; + lim_rx = na->num_rx_queues; /* how many queues we are scanning */ - i = priv->np_qfirst; - if (i == na->num_queues) { /* from/to host */ + if (priv->np_qfirst == NETMAP_SW_RING) { if (priv->np_txpoll || want_tx) { /* push any packets up, then we are always ready */ - kring = &na->tx_rings[i]; + kring = &na->tx_rings[lim_tx]; netmap_sync_to_host(na); revents |= want_tx; } if (want_rx) { - kring = &na->rx_rings[i]; + kring = &na->rx_rings[lim_rx]; if (kring->ring->avail == 0) netmap_sync_from_host(na, td); if (kring->ring->avail > 0) { @@ -1253,7 +1277,7 @@ netmap_poll(__unused struct cdev *dev, int events, struct thread *td) * there are pending packets to send. The latter can be disabled * passing NETMAP_NO_TX_POLL in the NIOCREG call. */ - check_all = (i + 1 != priv->np_qlast); + check_all = (priv->np_qlast == NETMAP_HW_RING) && (lim_tx > 1 || lim_rx > 1); /* * core_lock indicates what to do with the core lock. @@ -1270,25 +1294,29 @@ netmap_poll(__unused struct cdev *dev, int events, struct thread *td) * LOCKED_CL core lock is set, so we need to release it. */ core_lock = (check_all || !na->separate_locks) ? NEED_CL : NO_CL; + if (priv->np_qlast != NETMAP_HW_RING) { + lim_tx = lim_rx = priv->np_qlast; + } + /* * We start with a lock free round which is good if we have * data available. If this fails, then lock and call the sync * routines. */ - for (i = priv->np_qfirst; want_rx && i < priv->np_qlast; i++) { - kring = &na->rx_rings[i]; - if (kring->ring->avail > 0) { - revents |= want_rx; - want_rx = 0; /* also breaks the loop */ - } + for (i = priv->np_qfirst; want_rx && i < lim_rx; i++) { + kring = &na->rx_rings[i]; + if (kring->ring->avail > 0) { + revents |= want_rx; + want_rx = 0; /* also breaks the loop */ } - for (i = priv->np_qfirst; want_tx && i < priv->np_qlast; i++) { - kring = &na->tx_rings[i]; - if (kring->ring->avail > 0) { - revents |= want_tx; - want_tx = 0; /* also breaks the loop */ - } + } + for (i = priv->np_qfirst; want_tx && i < lim_tx; i++) { + kring = &na->tx_rings[i]; + if (kring->ring->avail > 0) { + revents |= want_tx; + want_tx = 0; /* also breaks the loop */ } + } /* * If we to push packets out (priv->np_txpoll) or want_tx is @@ -1296,7 +1324,7 @@ netmap_poll(__unused struct cdev *dev, int events, struct thread *td) * to avoid that the tx rings stall). */ if (priv->np_txpoll || want_tx) { - for (i = priv->np_qfirst; i < priv->np_qlast; i++) { + for (i = priv->np_qfirst; i < lim_tx; i++) { kring = &na->tx_rings[i]; /* * Skip the current ring if want_tx == 0 @@ -1340,7 +1368,7 @@ netmap_poll(__unused struct cdev *dev, int events, struct thread *td) * Do it on all rings because otherwise we starve. */ if (want_rx) { - for (i = priv->np_qfirst; i < priv->np_qlast; i++) { + for (i = priv->np_qfirst; i < lim_rx; i++) { kring = &na->rx_rings[i]; if (core_lock == NEED_CL) { na->nm_lock(ifp, NETMAP_CORE_LOCK, 0); @@ -1364,12 +1392,11 @@ netmap_poll(__unused struct cdev *dev, int events, struct thread *td) na->nm_lock(ifp, NETMAP_RX_UNLOCK, i); } } - if (check_all && revents == 0) { - i = na->num_queues + 1; /* the global queue */ + if (check_all && revents == 0) { /* signal on the global queue */ if (want_tx) - selrecord(td, &na->tx_rings[i].si); + selrecord(td, &na->tx_si); if (want_rx) - selrecord(td, &na->rx_rings[i].si); + selrecord(td, &na->rx_si); } if (core_lock == LOCKED_CL) na->nm_lock(ifp, NETMAP_CORE_UNLOCK, 0); @@ -1430,28 +1457,37 @@ netmap_lock_wrapper(struct ifnet *dev, int what, u_int queueid) * kring N is for the host stack queue * kring N+1 is only used for the selinfo for all queues. * Return 0 on success, ENOMEM otherwise. + * + * na->num_tx_queues can be set for cards with different tx/rx setups */ int netmap_attach(struct netmap_adapter *na, int num_queues) { - int n = num_queues + 2; - int size = sizeof(*na) + 2 * n * sizeof(struct netmap_kring); + int i, n, size; void *buf; struct ifnet *ifp = na->ifp; - int i; if (ifp == NULL) { D("ifp not set, giving up"); return EINVAL; } + /* clear other fields ? */ na->refcount = 0; - na->num_queues = num_queues; + if (na->num_tx_queues == 0) + na->num_tx_queues = num_queues; + na->num_rx_queues = num_queues; + /* on each direction we have N+1 resources + * 0..n-1 are the hardware rings + * n is the ring attached to the stack. + */ + n = na->num_rx_queues + na->num_tx_queues + 2; + size = sizeof(*na) + n * sizeof(struct netmap_kring); buf = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO); if (buf) { WNA(ifp) = buf; na->tx_rings = (void *)((char *)buf + sizeof(*na)); - na->rx_rings = na->tx_rings + n; + na->rx_rings = na->tx_rings + na->num_tx_queues + 1; na->buff_size = NETMAP_BUF_SIZE; bcopy(na, buf, sizeof(*na)); ifp->if_capabilities |= IFCAP_NETMAP; @@ -1460,11 +1496,17 @@ netmap_attach(struct netmap_adapter *na, int num_queues) if (na->nm_lock == NULL) na->nm_lock = netmap_lock_wrapper; mtx_init(&na->core_lock, "netmap core lock", NULL, MTX_DEF); - for (i = 0 ; i < num_queues; i++) + for (i = 0 ; i < na->num_tx_queues + 1; i++) mtx_init(&na->tx_rings[i].q_lock, "netmap txq lock", NULL, MTX_DEF); - for (i = 0 ; i < num_queues; i++) + for (i = 0 ; i < na->num_rx_queues + 1; i++) mtx_init(&na->rx_rings[i].q_lock, "netmap rxq lock", NULL, MTX_DEF); } +#ifdef linux + D("netdev_ops %p", ifp->netdev_ops); + /* prepare a clone of the netdev ops */ + na->nm_ndo = *ifp->netdev_ops; + na->nm_ndo.ndo_start_xmit = netmap_start_linux; +#endif D("%s for %s", buf ? "ok" : "failed", ifp->if_xname); return (buf ? 0 : ENOMEM); @@ -1484,10 +1526,16 @@ netmap_detach(struct ifnet *ifp) if (!na) return; - for (i = 0; i < na->num_queues + 2; i++) { + for (i = 0; i < na->num_tx_queues + 1; i++) { knlist_destroy(&na->tx_rings[i].si.si_note); - knlist_destroy(&na->rx_rings[i].si.si_note); + mtx_destroy(&na->tx_rings[i].q_lock); } + for (i = 0; i < na->num_rx_queues + 1; i++) { + knlist_destroy(&na->rx_rings[i].si.si_note); + mtx_destroy(&na->rx_rings[i].q_lock); + } + knlist_destroy(&na->tx_si.si_note); + knlist_destroy(&na->rx_si.si_note); bzero(na, sizeof(*na)); WNA(ifp) = NULL; free(na, M_DEVBUF); @@ -1503,7 +1551,7 @@ int netmap_start(struct ifnet *ifp, struct mbuf *m) { struct netmap_adapter *na = NA(ifp); - struct netmap_kring *kring = &na->rx_rings[na->num_queues]; + struct netmap_kring *kring = &na->rx_rings[na->num_rx_queues]; u_int i, len = MBUF_LEN(m); int error = EBUSY, lim = kring->nkr_num_slots - 1; struct netmap_slot *slot; @@ -1516,8 +1564,8 @@ netmap_start(struct ifnet *ifp, struct mbuf *m) D("stack ring %s full\n", ifp->if_xname); goto done; /* no space */ } - if (len > na->buff_size) { - D("drop packet size %d > %d", len, na->buff_size); + if (len > NETMAP_BUF_SIZE) { + D("drop packet size %d > %d", len, NETMAP_BUF_SIZE); goto done; /* too long for us */ } @@ -1530,7 +1578,7 @@ netmap_start(struct ifnet *ifp, struct mbuf *m) slot->len = len; kring->nr_hwavail++; if (netmap_verbose & NM_VERB_HOST) - D("wake up host ring %s %d", na->ifp->if_xname, na->num_queues); + D("wake up host ring %s %d", na->ifp->if_xname, na->num_rx_queues); selwakeuppri(&kring->si, PI_NET); error = 0; done: @@ -1556,21 +1604,21 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, int n, u_int new_cur) { struct netmap_kring *kring; - struct netmap_ring *ring; int new_hwofs, lim; if (na == NULL) return NULL; /* no netmap support here */ if (!(na->ifp->if_capenable & IFCAP_NETMAP)) return NULL; /* nothing to reinitialize */ - kring = tx == NR_TX ? na->tx_rings + n : na->rx_rings + n; - ring = kring->ring; - lim = kring->nkr_num_slots - 1; - if (tx == NR_TX) + if (tx == NR_TX) { + kring = na->tx_rings + n; new_hwofs = kring->nr_hwcur - new_cur; - else + } else { + kring = na->rx_rings + n; new_hwofs = kring->nr_hwcur + kring->nr_hwavail - new_cur; + } + lim = kring->nkr_num_slots - 1; if (new_hwofs > lim) new_hwofs -= lim + 1; @@ -1583,11 +1631,12 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, int n, tx == NR_TX ? "TX" : "RX", n); /* + * Wakeup on the individual and global lock * We do the wakeup here, but the ring is not yet reconfigured. * However, we are under lock so there are no races. */ selwakeuppri(&kring->si, PI_NET); - selwakeuppri(&kring[na->num_queues + 1 - n].si, PI_NET); + selwakeuppri(tx == NR_TX ? &na->tx_si : &na->rx_si, PI_NET); return kring->ring->slot; } @@ -1603,38 +1652,48 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, int n, * lock(i); wake(i); unlock(i) * N rings, separate locks: * lock(i); wake(i); unlock(i); lock(core) wake(N+1) unlock(core) + * work_done is non-null on the RX path. */ int netmap_rx_irq(struct ifnet *ifp, int q, int *work_done) { struct netmap_adapter *na; struct netmap_kring *r; + NM_SELINFO_T *main_wq; if (!(ifp->if_capenable & IFCAP_NETMAP)) return 0; na = NA(ifp); - r = work_done ? na->rx_rings : na->tx_rings; + if (work_done) { /* RX path */ + r = na->rx_rings + q; + r->nr_kflags |= NKR_PENDINTR; + main_wq = (na->num_rx_queues > 1) ? &na->tx_si : NULL; + } else { /* tx path */ + r = na->tx_rings + q; + main_wq = (na->num_tx_queues > 1) ? &na->rx_si : NULL; + work_done = &q; /* dummy */ + } if (na->separate_locks) { - mtx_lock(&r[q].q_lock); - selwakeuppri(&r[q].si, PI_NET); - mtx_unlock(&r[q].q_lock); - if (na->num_queues > 1) { + mtx_lock(&r->q_lock); + selwakeuppri(&r->si, PI_NET); + mtx_unlock(&r->q_lock); + if (main_wq) { mtx_lock(&na->core_lock); - selwakeuppri(&r[na->num_queues + 1].si, PI_NET); + selwakeuppri(main_wq, PI_NET); mtx_unlock(&na->core_lock); } } else { mtx_lock(&na->core_lock); - selwakeuppri(&r[q].si, PI_NET); - if (na->num_queues > 1) - selwakeuppri(&r[na->num_queues + 1].si, PI_NET); + selwakeuppri(&r->si, PI_NET); + if (main_wq) + selwakeuppri(main_wq, PI_NET); mtx_unlock(&na->core_lock); } - if (work_done) *work_done = 1; /* do not fire napi again */ return 1; } + static struct cdevsw netmap_cdevsw = { .d_version = D_VERSION, .d_name = "netmap", @@ -1666,7 +1725,7 @@ netmap_init(void) return (error); } printf("netmap: loaded module with %d Mbytes\n", - (int)(netmap_mem_d->nm_totalsize >> 20)); + (int)(nm_mem->nm_totalsize >> 20)); netmap_dev = make_dev(&netmap_cdevsw, 0, UID_ROOT, GID_WHEEL, 0660, "netmap"); return (error); diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h index 6e0357047773..7660d14fb0a9 100644 --- a/sys/dev/netmap/netmap_kern.h +++ b/sys/dev/netmap/netmap_kern.h @@ -25,7 +25,7 @@ /* * $FreeBSD$ - * $Id: netmap_kern.h 9795 2011-12-02 11:39:08Z luigi $ + * $Id: netmap_kern.h 10602 2012-02-21 16:47:55Z luigi $ * * The header contains the definitions of constants and function * prototypes used only in kernelspace. @@ -39,7 +39,7 @@ #define NM_SELINFO_T struct selinfo #define MBUF_LEN(m) ((m)->m_pkthdr.len) #define NM_SEND_UP(ifp, m) ((ifp)->if_input)(ifp, m) -#elif defined (__linux__) +#elif defined (linux) #define NM_LOCK_T spinlock_t #define NM_SELINFO_T wait_queue_head_t #define MBUF_LEN(m) ((m)->len) @@ -65,7 +65,14 @@ MALLOC_DECLARE(M_NETMAP); struct netmap_adapter; /* - * private, kernel view of a ring. + * private, kernel view of a ring. Keeps track of the status of + * a ring across system calls. + * + * nr_hwcur index of the next buffer to refill. + * It corresponds to ring->cur - ring->reserved + * + * nr_hwavail the number of slots "owned" by userspace. + * nr_hwavail =:= ring->avail + ring->reserved * * The indexes in the NIC and netmap rings are offset by nkr_hwofs slots. * This is so that, on a reset, buffers owned by userspace are not @@ -101,13 +108,14 @@ struct netmap_adapter { int separate_locks; /* set if the interface suports different locks for rx, tx and core. */ - u_int num_queues; /* number of tx/rx queue pairs: this is + u_int num_rx_queues; /* number of tx/rx queue pairs: this is a duplicate field needed to simplify the signature of ``netmap_detach``. */ + u_int num_tx_queues; // if nonzero, overrides num_queues XXX u_int num_tx_desc; /* number of descriptor in each queue */ u_int num_rx_desc; - u_int buff_size; + u_int buff_size; // XXX deprecate, use NETMAP_BUF_SIZE //u_int flags; // XXX unused /* tx_rings and rx_rings are private but allocated @@ -117,6 +125,8 @@ struct netmap_adapter { struct netmap_kring *tx_rings; /* array of TX rings. */ struct netmap_kring *rx_rings; /* array of RX rings. */ + NM_SELINFO_T tx_si, rx_si; /* global wait queues */ + /* copy of if_qflush and if_transmit pointers, to intercept * packets from the network stack when netmap is active. * XXX probably if_qflush is not necessary. @@ -135,6 +145,9 @@ struct netmap_adapter { void (*nm_lock)(struct ifnet *, int what, u_int ringid); int (*nm_txsync)(struct ifnet *, u_int ring, int lock); int (*nm_rxsync)(struct ifnet *, u_int ring, int lock); +#ifdef linux + struct net_device_ops nm_ndo; +#endif /* linux */ }; /* @@ -254,55 +267,33 @@ netmap_reload_map(bus_dma_tag_t tag, bus_dmamap_t map, void *buf) * functions to map NIC to KRING indexes (n2k) and vice versa (k2n) */ static inline int -netmap_ridx_n2k(struct netmap_adapter *na, int ring, int nic_idx) +netmap_idx_n2k(struct netmap_kring *kr, int idx) { - int kring_idx = nic_idx + na->rx_rings[ring].nkr_hwofs; - if (kring_idx < 0) - return kring_idx + na->num_rx_desc; - else if (kring_idx < na->num_rx_desc) - return kring_idx; + int n = kr->nkr_num_slots; + idx += kr->nkr_hwofs; + if (idx < 0) + return idx + n; + else if (idx < n) + return idx; else - return kring_idx - na->num_rx_desc; -} - -static inline int -netmap_tidx_n2k(struct netmap_adapter *na, int ring, int nic_idx) -{ - int kring_idx = nic_idx + na->tx_rings[ring].nkr_hwofs; - if (kring_idx < 0) - return kring_idx + na->num_tx_desc; - else if (kring_idx < na->num_tx_desc) - return kring_idx; - else - return kring_idx - na->num_tx_desc; + return idx - n; } static inline int -netmap_ridx_k2n(struct netmap_adapter *na, int ring, int kring_idx) +netmap_idx_k2n(struct netmap_kring *kr, int idx) { - int nic_idx = kring_idx - na->rx_rings[ring].nkr_hwofs; - if (nic_idx < 0) - return nic_idx + na->num_rx_desc; - else if (nic_idx < na->num_rx_desc) - return nic_idx; + int n = kr->nkr_num_slots; + idx -= kr->nkr_hwofs; + if (idx < 0) + return idx + n; + else if (idx < n) + return idx; else - return nic_idx - na->num_rx_desc; + return idx - n; } -static inline int -netmap_tidx_k2n(struct netmap_adapter *na, int ring, int kring_idx) -{ - int nic_idx = kring_idx - na->tx_rings[ring].nkr_hwofs; - if (nic_idx < 0) - return nic_idx + na->num_tx_desc; - else if (nic_idx < na->num_tx_desc) - return nic_idx; - else - return nic_idx - na->num_tx_desc; -} - /* * NMB return the virtual address of a buffer (buffer 0 on bad index) * PNMB also fills the physical address diff --git a/sys/dev/re/if_re.c b/sys/dev/re/if_re.c index f73389ef1e7e..d11661cbe318 100644 --- a/sys/dev/re/if_re.c +++ b/sys/dev/re/if_re.c @@ -2111,6 +2111,7 @@ re_rxeof(struct rl_softc *sc, int *rx_npktsp) ifp = sc->rl_ifp; #ifdef DEV_NETMAP if (ifp->if_capenable & IFCAP_NETMAP) { + NA(ifp)->rx_rings->nr_kflags |= NKR_PENDINTR; selwakeuppri(&NA(ifp)->rx_rings->si, PI_NET); return 0; } diff --git a/sys/net/netmap.h b/sys/net/netmap.h index 0ba1537b4e1a..888c15b62164 100644 --- a/sys/net/netmap.h +++ b/sys/net/netmap.h @@ -32,11 +32,13 @@ /* * $FreeBSD$ - * $Id: netmap.h 9753 2011-11-28 15:10:43Z luigi $ + * $Id: netmap.h 10601 2012-02-21 16:40:14Z luigi $ * - * This header contains the definitions of the constants and the - * structures needed by the ``netmap'' module, both kernel and - * userspace. + * Definitions of constants and the structures used by the netmap + * framework, for the part visible to both kernel and userspace. + * Detailed info on netmap is available with "man netmap" or at + * + * http://info.iet.unipi.it/~luigi/netmap/ */ #ifndef _NET_NETMAP_H_ @@ -48,14 +50,8 @@ * The data structures used by netmap are shown below. Those in * capital letters are in an mmapp()ed area shared with userspace, * while others are private to the kernel. - * Shared structures do not contain pointers but only relative + * Shared structures do not contain pointers but only memory * offsets, so that addressing is portable between kernel and userspace. - * - * The 'softc' of each interface is extended with a struct netmap_adapter - * containing information to support netmap operation. In addition to - * the fixed fields, it has two pointers to reach the arrays of - * 'struct netmap_kring' which in turn reaches the various - * struct netmap_ring, shared with userspace. softc @@ -67,19 +63,22 @@ +----------------+<------+ |(netmap_adapter)| | | netmap_kring -| tx_rings *--------------------------------->+-------------+ -| | netmap_kring | ring *---------> ... -| rx_rings *---------->+--------------+ | nr_hwcur | -+----------------+ | ring *-------+ | nr_hwavail | - | nr_hwcur | | | selinfo | - | nr_hwavail | | +-------------+ - | selinfo | | | ... | - +--------------+ | (na_num_rings+1 entries) - | .... | | | | - (na_num_rings+1 entries) +-------------+ - | | | - +--------------+ | - | NETMAP_RING +| tx_rings *--------------------------------->+---------------+ +| | netmap_kring | ring *---------. +| rx_rings *--------->+---------------+ | nr_hwcur | | ++----------------+ | ring *--------. | nr_hwavail | V + | nr_hwcur | | | selinfo | | + | nr_hwavail | | +---------------+ . + | selinfo | | | ... | . + +---------------+ | |(ntx+1 entries)| + | .... | | | | + |(nrx+1 entries)| | +---------------+ + | | | + KERNEL +---------------+ | + | + ==================================================================== + | + USERSPACE | NETMAP_RING +---->+-------------+ / | cur | NETMAP_IF (nifp, one per file desc.) / | avail | @@ -100,16 +99,23 @@ | txring_ofs[n] | +---------------+ - * The NETMAP_RING is the shadow ring that mirrors the NIC rings. + * The private descriptor ('softc' or 'adapter') of each interface + * is extended with a "struct netmap_adapter" containing netmap-related + * info (see description in dev/netmap/netmap_kernel.h. + * Among other things, tx_rings and rx_rings point to the arrays of + * "struct netmap_kring" which in turn reache the various + * "struct netmap_ring", shared with userspace. + + * The NETMAP_RING is the userspace-visible replica of the NIC ring. * Each slot has the index of a buffer, its length and some flags. * In user space, the buffer address is computed as - * (char *)ring + buf_ofs + index*MAX_BUF_SIZE + * (char *)ring + buf_ofs + index*NETMAP_BUF_SIZE * In the kernel, buffers do not necessarily need to be contiguous, * and the virtual and physical addresses are derived through - * a lookup table. When userspace wants to use a different buffer - * in a location, it must set the NS_BUF_CHANGED flag to make - * sure that the kernel recomputes updates the hardware ring and - * other fields (bus_dmamap, etc.) as needed. + * a lookup table. + * To associate a different buffer to a slot, applications must + * write the new index in buf_idx, and set NS_BUF_CHANGED flag to + * make sure that the kernel updates the hardware ring as needed. * * Normally the driver is not requested to report the result of * transmissions (this can dramatically speed up operation). @@ -133,13 +139,16 @@ struct netmap_slot { * * In TX rings: * avail indicates the number of slots available for transmission. - * It is decremented by the application when it appends a - * packet, and set to nr_hwavail (see below) on a - * NIOCTXSYNC to reflect the actual state of the queue - * (keeping track of completed transmissions). - * cur indicates the empty slot to use for the next packet + * It is updated by the kernel after every netmap system call. + * It MUST BE decremented by the application when it appends a + * packet. + * cur indicates the slot to use for the next packet * to send (i.e. the "tail" of the queue). - * It is incremented by the application. + * It MUST BE incremented by the application before + * netmap system calls to reflect the number of newly + * sent packets. + * It is checked by the kernel on netmap system calls + * (normally unmodified by the kernel unless invalid). * * The kernel side of netmap uses two additional fields in its own * private ring structure, netmap_kring: @@ -153,12 +162,17 @@ struct netmap_slot { * * In RX rings: * avail is the number of packets available (possibly 0). - * It is decremented by the software when it consumes - * a packet, and set to nr_hwavail on a NIOCRXSYNC - * cur indicates the first slot that contains a packet - * (the "head" of the queue). - * It is incremented by the software when it consumes + * It MUST BE decremented by the application when it consumes + * a packet, and it is updated to nr_hwavail on a NIOCRXSYNC + * cur indicates the first slot that contains a packet not + * processed yet (the "head" of the queue). + * It MUST BE incremented by the software when it consumes * a packet. + * reserved indicates the number of buffers before 'cur' + * that the application has still in use. Normally 0, + * it MUST BE incremented by the application when it + * does not return the buffer immediately, and decremented + * when the buffer is finally freed. * * The kernel side of netmap uses two additional fields in the kring: * nr_hwcur is a copy of nr_cur on an NIOCRXSYNC @@ -182,7 +196,8 @@ struct netmap_ring { const ssize_t buf_ofs; const uint32_t num_slots; /* number of slots in the ring. */ uint32_t avail; /* number of usable slots */ - uint32_t cur; /* 'current' r/w position */ + uint32_t cur; /* 'current' r/w position */ + uint32_t reserved; /* not refilled before current */ const uint16_t nr_buf_size; uint16_t flags; @@ -191,7 +206,7 @@ struct netmap_ring { struct timeval ts; /* time of last *sync() */ /* the slots follow. This struct has variable size */ - struct netmap_slot slot[0]; /* array of slots. */ + struct netmap_slot slot[0]; /* array of slots. */ }; @@ -204,24 +219,23 @@ struct netmap_ring { * nmr_queueid passed on the ioctl. */ struct netmap_if { - char ni_name[IFNAMSIZ]; /* name of the interface. */ - const u_int ni_version; /* API version, currently unused */ - const u_int ni_num_queues; /* number of queue pairs (TX/RX). */ - const u_int ni_rx_queues; /* if zero, use ni_num_queues */ + char ni_name[IFNAMSIZ]; /* name of the interface. */ + const u_int ni_version; /* API version, currently unused */ + const u_int ni_rx_queues; /* number of rx queue pairs */ + const u_int ni_tx_queues; /* if zero, same as ni_tx_queues */ /* - * the following array contains the offset of the - * each netmap ring from this structure. The first num_queues+1 - * refer to the tx rings, the next n+1 refer to the rx rings. + * The following array contains the offset of each netmap ring + * from this structure. The first ni_tx_queues+1 entries refer + * to the tx rings, the next ni_rx_queues+1 refer to the rx rings + * (the last entry in each block refers to the host stack rings). * The area is filled up by the kernel on NIOCREG, * and then only read by userspace code. - * entries 0..ni_num_queues-1 indicate the hardware queues, - * entry ni_num_queues is the queue from/to the stack. */ const ssize_t ring_ofs[0]; }; -#ifndef IFCAP_NETMAP /* this should go in net/if.h */ -#define IFCAP_NETMAP 0x100000 +#ifndef IFCAP_NETMAP +#define IFCAP_NETMAP 0x100000 /* used on linux */ #endif #ifndef NIOCREGIF @@ -246,18 +260,29 @@ struct netmap_if { */ struct nmreq { char nr_name[IFNAMSIZ]; - uint32_t nr_version; /* API version (unused) */ + uint32_t nr_version; /* API version */ +#define NETMAP_API 2 /* current version */ uint32_t nr_offset; /* nifp offset in the shared region */ uint32_t nr_memsize; /* size of the shared region */ - uint32_t nr_numslots; /* descriptors per queue */ - uint16_t nr_numrings; + uint32_t nr_tx_slots; /* slots in tx rings */ + uint32_t nr_rx_slots; /* slots in rx rings */ + uint16_t nr_tx_rings; /* number of tx rings */ + uint16_t nr_rx_rings; /* number of rx rings */ uint16_t nr_ringid; /* ring(s) we care about */ #define NETMAP_HW_RING 0x4000 /* low bits indicate one hw ring */ -#define NETMAP_SW_RING 0x2000 /* we process the sw ring */ -#define NETMAP_NO_TX_POLL 0x1000 /* no gratuitous txsync on poll */ +#define NETMAP_SW_RING 0x2000 /* process the sw ring */ +#define NETMAP_NO_TX_POLL 0x1000 /* no automatic txsync on poll */ #define NETMAP_RING_MASK 0xfff /* the ring number */ + uint16_t spare1; + uint32_t spare2[4]; }; +/* + * FreeBSD uses the size value embedded in the _IOWR to determine + * how much to copy in/out. So we need it to match the actual + * data structure we pass. We put some spares in the structure + * to ease compatibility with other versions + */ #define NIOCGINFO _IOWR('i', 145, struct nmreq) /* return IF info */ #define NIOCREGIF _IOWR('i', 146, struct nmreq) /* interface register */ #define NIOCUNREGIF _IO('i', 147) /* interface unregister */ diff --git a/sys/net/netmap_user.h b/sys/net/netmap_user.h index 64490452fe0c..be66e7a14bee 100644 --- a/sys/net/netmap_user.h +++ b/sys/net/netmap_user.h @@ -32,14 +32,13 @@ /* * $FreeBSD$ - * $Id: netmap_user.h 9495 2011-10-18 15:28:23Z luigi $ + * $Id: netmap_user.h 10597 2012-02-21 05:08:32Z luigi $ * * This header contains the macros used to manipulate netmap structures * and packets in userspace. See netmap(4) for more information. * - * The address of the struct netmap_if, say nifp, is determined - * by the value returned from ioctl(.., NIOCREG, ...) and the mmap - * region: + * The address of the struct netmap_if, say nifp, is computed from the + * value returned from ioctl(.., NIOCREG, ...) and the mmap region: * ioctl(fd, NIOCREG, &req); * mem = mmap(0, ... ); * nifp = NETMAP_IF(mem, req.nr_nifp); @@ -71,21 +70,20 @@ #define NETMAP_RXRING(nifp, index) \ ((struct netmap_ring *)((char *)(nifp) + \ - (nifp)->ring_ofs[index + (nifp)->ni_num_queues+1] ) ) + (nifp)->ring_ofs[index + (nifp)->ni_tx_queues+1] ) ) #define NETMAP_BUF(ring, index) \ ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size)) +#define NETMAP_BUF_IDX(ring, buf) \ + ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ + (ring)->nr_buf_size) ) + #define NETMAP_RING_NEXT(r, i) \ ((i)+1 == (r)->num_slots ? 0 : (i) + 1 ) /* * Return 1 if the given tx ring is empty. - * - * @r netmap_ring descriptor pointer. - * Special case, a negative value in hwavail indicates that the - * transmit queue is idle. - * XXX revise */ #define NETMAP_TX_RING_EMPTY(r) ((r)->avail >= (r)->num_slots - 1) diff --git a/tools/tools/netmap/bridge.c b/tools/tools/netmap/bridge.c index 2b9122c7e72c..1b027d7a45fe 100644 --- a/tools/tools/netmap/bridge.c +++ b/tools/tools/netmap/bridge.c @@ -48,7 +48,7 @@ int verbose = 0; } while (0) -char *version = "$Id: bridge.c 9642 2011-11-07 21:39:47Z luigi $"; +char *version = "$Id: bridge.c 10637 2012-02-24 16:36:25Z luigi $"; static int do_abort = 0; @@ -136,6 +136,7 @@ netmap_open(struct my_ring *me, int ringid) bzero(&req, sizeof(req)); strncpy(req.nr_name, me->ifname, sizeof(req.nr_name)); req.nr_ringid = ringid; + req.nr_version = NETMAP_API; err = ioctl(fd, NIOCGINFO, &req); if (err) { D("cannot get info on %s", me->ifname); @@ -162,17 +163,22 @@ netmap_open(struct my_ring *me, int ringid) me->nifp = NETMAP_IF(me->mem, req.nr_offset); me->queueid = ringid; if (ringid & NETMAP_SW_RING) { - me->begin = req.nr_numrings; + me->begin = req.nr_rx_rings; me->end = me->begin + 1; + me->tx = NETMAP_TXRING(me->nifp, req.nr_tx_rings); + me->rx = NETMAP_RXRING(me->nifp, req.nr_rx_rings); } else if (ringid & NETMAP_HW_RING) { + D("XXX check multiple threads"); me->begin = ringid & NETMAP_RING_MASK; me->end = me->begin + 1; + me->tx = NETMAP_TXRING(me->nifp, me->begin); + me->rx = NETMAP_RXRING(me->nifp, me->begin); } else { me->begin = 0; - me->end = req.nr_numrings; + me->end = req.nr_rx_rings; // XXX max of the two + me->tx = NETMAP_TXRING(me->nifp, 0); + me->rx = NETMAP_RXRING(me->nifp, 0); } - me->tx = NETMAP_TXRING(me->nifp, me->begin); - me->rx = NETMAP_RXRING(me->nifp, me->begin); return (0); error: close(me->fd); @@ -294,7 +300,7 @@ howmany(struct my_ring *me, int tx) if (0 && verbose && tot && !tx) D("ring %s %s %s has %d avail at %d", me->ifname, tx ? "tx": "rx", - me->end > me->nifp->ni_num_queues ? + me->end > me->nifp->ni_rx_queues ? "host":"net", tot, NETMAP_TXRING(me->nifp, me->begin)->cur); return tot; @@ -392,8 +398,8 @@ main(int argc, char **argv) D("Wait 2 secs for link to come up..."); sleep(2); D("Ready to go, %s 0x%x/%d <-> %s 0x%x/%d.", - me[0].ifname, me[0].queueid, me[0].nifp->ni_num_queues, - me[1].ifname, me[1].queueid, me[1].nifp->ni_num_queues); + me[0].ifname, me[0].queueid, me[0].nifp->ni_rx_queues, + me[1].ifname, me[1].queueid, me[1].nifp->ni_rx_queues); /* main loop */ signal(SIGINT, sigint_h); diff --git a/tools/tools/netmap/pcap.c b/tools/tools/netmap/pcap.c index f010b839bfb2..2a93e82c7f55 100644 --- a/tools/tools/netmap/pcap.c +++ b/tools/tools/netmap/pcap.c @@ -257,14 +257,14 @@ netmap_open(struct my_ring *me, int ringid) me->nifp = NETMAP_IF(me->mem, req.nr_offset); me->queueid = ringid; if (ringid & NETMAP_SW_RING) { - me->begin = req.nr_numrings; + me->begin = req.nr_rx_rings; me->end = me->begin + 1; } else if (ringid & NETMAP_HW_RING) { me->begin = ringid & NETMAP_RING_MASK; me->end = me->begin + 1; } else { me->begin = 0; - me->end = req.nr_numrings; + me->end = req.nr_rx_rings; } /* request timestamps for packets */ for (i = me->begin; i < me->end; i++) { diff --git a/tools/tools/netmap/pkt-gen.c b/tools/tools/netmap/pkt-gen.c index 5627e9ea3ab5..43eeda86e159 100644 --- a/tools/tools/netmap/pkt-gen.c +++ b/tools/tools/netmap/pkt-gen.c @@ -25,7 +25,7 @@ /* * $FreeBSD$ - * $Id: pkt-gen.c 9827 2011-12-05 11:29:34Z luigi $ + * $Id: pkt-gen.c 10637 2012-02-24 16:36:25Z luigi $ * * Example program to show how to build a multithreaded packet * source/sink using the netmap device. @@ -776,6 +776,7 @@ main(int arc, char **argv) } bzero(&nmr, sizeof(nmr)); + nmr.nr_version = NETMAP_API; /* * Open the netmap device to fetch the number of queues of our * interface. @@ -796,11 +797,12 @@ main(int arc, char **argv) D("map size is %d Kb", nmr.nr_memsize >> 10); } bzero(&nmr, sizeof(nmr)); + nmr.nr_version = NETMAP_API; strncpy(nmr.nr_name, ifname, sizeof(nmr.nr_name)); if ((ioctl(fd, NIOCGINFO, &nmr)) == -1) { D("Unable to get if info for %s", ifname); } - devqueues = nmr.nr_numrings; + devqueues = nmr.nr_rx_rings; } /* validate provided nthreads. */ @@ -841,6 +843,7 @@ main(int arc, char **argv) * We decide to put the first interface registration here to * give time to cards that take a long time to reset the PHY. */ + nmr.nr_version = NETMAP_API; if (ioctl(fd, NIOCREGIF, &nmr) == -1) { D("Unable to register interface %s", ifname); //continue, fail later @@ -904,6 +907,7 @@ main(int arc, char **argv) bzero(&tifreq, sizeof(tifreq)); strncpy(tifreq.nr_name, ifname, sizeof(tifreq.nr_name)); + tifreq.nr_version = NETMAP_API; tifreq.nr_ringid = (g.nthreads > 1) ? (i | NETMAP_HW_RING) : 0; /* @@ -930,7 +934,8 @@ main(int arc, char **argv) targs[i].nmr = tifreq; targs[i].nifp = tnifp; targs[i].qfirst = (g.nthreads > 1) ? i : 0; - targs[i].qlast = (g.nthreads > 1) ? i+1 : tifreq.nr_numrings; + targs[i].qlast = (g.nthreads > 1) ? i+1 : + (td_body == receiver_body ? tifreq.nr_rx_rings : tifreq.nr_tx_rings); targs[i].me = i; targs[i].affinity = g.cpus ? i % g.cpus : -1; if (td_body == sender_body) {