sfc: support passing a representor to the EF100 TX path

A non-null efv in __ef100_enqueue_skb() indicates that the packet is
 from that representor, should be transmitted with a suitable option
 descriptor (to instruct the switch to deliver it to the representee),
 and should not be accounted to the parent PF's stats or BQL.

Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Edward Cree 2022-07-20 19:33:47 +01:00 committed by David S. Miller
parent da56552d04
commit 02443ab8c9
7 changed files with 123 additions and 17 deletions

View file

@ -15,6 +15,12 @@
#include "net_driver.h"
struct efx_rep_sw_stats {
atomic64_t rx_packets, tx_packets;
atomic64_t rx_bytes, tx_bytes;
atomic64_t rx_dropped, tx_errors;
};
/**
* struct efx_rep - Private data for an Efx representor
*
@ -24,6 +30,7 @@
* @mport: m-port ID of corresponding VF
* @idx: VF index
* @list: entry on efx->vf_reps
* @stats: software traffic counters for netdev stats
*/
struct efx_rep {
struct efx_nic *parent;
@ -32,6 +39,7 @@ struct efx_rep {
u32 mport;
unsigned int idx;
struct list_head list;
struct efx_rep_sw_stats stats;
};
int efx_ef100_vfrep_create(struct efx_nic *efx, unsigned int i);

View file

@ -254,7 +254,8 @@ static void ef100_make_tso_desc(struct efx_nic *efx,
static void ef100_tx_make_descriptors(struct efx_tx_queue *tx_queue,
const struct sk_buff *skb,
unsigned int segment_count)
unsigned int segment_count,
struct efx_rep *efv)
{
unsigned int old_write_count = tx_queue->write_count;
unsigned int new_write_count = old_write_count;
@ -272,6 +273,20 @@ static void ef100_tx_make_descriptors(struct efx_tx_queue *tx_queue,
else
next_desc_type = ESE_GZ_TX_DESC_TYPE_SEND;
if (unlikely(efv)) {
/* Create TX override descriptor */
write_ptr = new_write_count & tx_queue->ptr_mask;
txd = ef100_tx_desc(tx_queue, write_ptr);
++new_write_count;
tx_queue->packet_write_count = new_write_count;
EFX_POPULATE_OWORD_3(*txd,
ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_PREFIX,
ESF_GZ_TX_PREFIX_EGRESS_MPORT, efv->mport,
ESF_GZ_TX_PREFIX_EGRESS_MPORT_EN, 1);
nr_descs--;
}
/* if it's a raw write (such as XDP) then always SEND single frames */
if (!skb)
nr_descs = 1;
@ -306,6 +321,9 @@ static void ef100_tx_make_descriptors(struct efx_tx_queue *tx_queue,
/* if it's a raw write (such as XDP) then always SEND */
next_desc_type = skb ? ESE_GZ_TX_DESC_TYPE_SEG :
ESE_GZ_TX_DESC_TYPE_SEND;
/* mark as an EFV buffer if applicable */
if (unlikely(efv))
buffer->flags |= EFX_TX_BUF_EFV;
} while (new_write_count != tx_queue->insert_count);
@ -324,7 +342,7 @@ static void ef100_tx_make_descriptors(struct efx_tx_queue *tx_queue,
void ef100_tx_write(struct efx_tx_queue *tx_queue)
{
ef100_tx_make_descriptors(tx_queue, NULL, 0);
ef100_tx_make_descriptors(tx_queue, NULL, 0, NULL);
ef100_tx_push_buffers(tx_queue);
}
@ -350,6 +368,12 @@ void ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event)
* function will free the SKB.
*/
int ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
{
return __ef100_enqueue_skb(tx_queue, skb, NULL);
}
int __ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
struct efx_rep *efv)
{
unsigned int old_insert_count = tx_queue->insert_count;
struct efx_nic *efx = tx_queue->efx;
@ -376,16 +400,64 @@ int ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
return 0;
}
if (unlikely(efv)) {
struct efx_tx_buffer *buffer = __efx_tx_queue_get_insert_buffer(tx_queue);
/* Drop representor packets if the queue is stopped.
* We currently don't assert backoff to representors so this is
* to make sure representor traffic can't starve the main
* net device.
* And, of course, if there are no TX descriptors left.
*/
if (netif_tx_queue_stopped(tx_queue->core_txq) ||
unlikely(efx_tx_buffer_in_use(buffer))) {
atomic64_inc(&efv->stats.tx_errors);
rc = -ENOSPC;
goto err;
}
/* Also drop representor traffic if it could cause us to
* stop the queue. If we assert backoff and we haven't
* received traffic on the main net device recently then the
* TX watchdog can go off erroneously.
*/
fill_level = efx_channel_tx_old_fill_level(tx_queue->channel);
fill_level += efx_tx_max_skb_descs(efx);
if (fill_level > efx->txq_stop_thresh) {
struct efx_tx_queue *txq2;
/* Refresh cached fill level and re-check */
efx_for_each_channel_tx_queue(txq2, tx_queue->channel)
txq2->old_read_count = READ_ONCE(txq2->read_count);
fill_level = efx_channel_tx_old_fill_level(tx_queue->channel);
fill_level += efx_tx_max_skb_descs(efx);
if (fill_level > efx->txq_stop_thresh) {
atomic64_inc(&efv->stats.tx_errors);
rc = -ENOSPC;
goto err;
}
}
buffer->flags = EFX_TX_BUF_OPTION | EFX_TX_BUF_EFV;
tx_queue->insert_count++;
}
/* Map for DMA and create descriptors */
rc = efx_tx_map_data(tx_queue, skb, segments);
if (rc)
goto err;
ef100_tx_make_descriptors(tx_queue, skb, segments);
ef100_tx_make_descriptors(tx_queue, skb, segments, efv);
fill_level = efx_channel_tx_old_fill_level(tx_queue->channel);
if (fill_level > efx->txq_stop_thresh) {
struct efx_tx_queue *txq2;
/* Because of checks above, representor traffic should
* not be able to stop the queue.
*/
WARN_ON(efv);
netif_tx_stop_queue(tx_queue->core_txq);
/* Re-read after a memory barrier in case we've raced with
* the completion path. Otherwise there's a danger we'll never
@ -404,8 +476,12 @@ int ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
/* If xmit_more then we don't need to push the doorbell, unless there
* are 256 descriptors already queued in which case we have to push to
* ensure we never push more than 256 at once.
*
* Always push for representor traffic, and don't account it to parent
* PF netdevice's BQL.
*/
if (__netdev_tx_sent_queue(tx_queue->core_txq, skb->len, xmit_more) ||
if (unlikely(efv) ||
__netdev_tx_sent_queue(tx_queue->core_txq, skb->len, xmit_more) ||
tx_queue->write_count - tx_queue->notify_count > 255)
ef100_tx_push_buffers(tx_queue);

View file

@ -13,6 +13,7 @@
#define EFX_EF100_TX_H
#include "net_driver.h"
#include "ef100_rep.h"
int ef100_tx_probe(struct efx_tx_queue *tx_queue);
void ef100_tx_init(struct efx_tx_queue *tx_queue);
@ -22,4 +23,6 @@ unsigned int ef100_tx_max_skb_descs(struct efx_nic *efx);
void ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event);
netdev_tx_t ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb);
int __ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
struct efx_rep *efv);
#endif

View file

@ -178,6 +178,7 @@ struct efx_tx_buffer {
#define EFX_TX_BUF_OPTION 0x10 /* empty buffer for option descriptor */
#define EFX_TX_BUF_XDP 0x20 /* buffer was sent with XDP */
#define EFX_TX_BUF_TSO_V3 0x40 /* empty buffer for a TSO_V3 descriptor */
#define EFX_TX_BUF_EFV 0x100 /* buffer was sent from representor */
/**
* struct efx_tx_queue - An Efx TX queue

View file

@ -559,6 +559,7 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
void efx_xmit_done_single(struct efx_tx_queue *tx_queue)
{
unsigned int pkts_compl = 0, bytes_compl = 0;
unsigned int efv_pkts_compl = 0;
unsigned int read_ptr;
bool finished = false;
@ -580,7 +581,8 @@ void efx_xmit_done_single(struct efx_tx_queue *tx_queue)
/* Need to check the flag before dequeueing. */
if (buffer->flags & EFX_TX_BUF_SKB)
finished = true;
efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl,
&efv_pkts_compl);
++tx_queue->read_count;
read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
@ -589,7 +591,7 @@ void efx_xmit_done_single(struct efx_tx_queue *tx_queue)
tx_queue->pkts_compl += pkts_compl;
tx_queue->bytes_compl += bytes_compl;
EFX_WARN_ON_PARANOID(pkts_compl != 1);
EFX_WARN_ON_PARANOID(pkts_compl + efv_pkts_compl != 1);
efx_xmit_done_check_empty(tx_queue);
}

View file

@ -109,9 +109,11 @@ void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
/* Free any buffers left in the ring */
while (tx_queue->read_count != tx_queue->write_count) {
unsigned int pkts_compl = 0, bytes_compl = 0;
unsigned int efv_pkts_compl = 0;
buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask];
efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl,
&efv_pkts_compl);
++tx_queue->read_count;
}
@ -146,7 +148,8 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
struct efx_tx_buffer *buffer,
unsigned int *pkts_compl,
unsigned int *bytes_compl)
unsigned int *bytes_compl,
unsigned int *efv_pkts_compl)
{
if (buffer->unmap_len) {
struct device *dma_dev = &tx_queue->efx->pci_dev->dev;
@ -164,9 +167,15 @@ void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
if (buffer->flags & EFX_TX_BUF_SKB) {
struct sk_buff *skb = (struct sk_buff *)buffer->skb;
EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl);
(*pkts_compl)++;
(*bytes_compl) += skb->len;
if (unlikely(buffer->flags & EFX_TX_BUF_EFV)) {
EFX_WARN_ON_PARANOID(!efv_pkts_compl);
(*efv_pkts_compl)++;
} else {
EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl);
(*pkts_compl)++;
(*bytes_compl) += skb->len;
}
if (tx_queue->timestamping &&
(tx_queue->completed_timestamp_major ||
tx_queue->completed_timestamp_minor)) {
@ -199,7 +208,8 @@ void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
unsigned int index,
unsigned int *pkts_compl,
unsigned int *bytes_compl)
unsigned int *bytes_compl,
unsigned int *efv_pkts_compl)
{
struct efx_nic *efx = tx_queue->efx;
unsigned int stop_index, read_ptr;
@ -218,7 +228,8 @@ static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
return;
}
efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl);
efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl,
efv_pkts_compl);
++tx_queue->read_count;
read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
@ -241,15 +252,17 @@ void efx_xmit_done_check_empty(struct efx_tx_queue *tx_queue)
void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
{
unsigned int fill_level, pkts_compl = 0, bytes_compl = 0;
unsigned int efv_pkts_compl = 0;
struct efx_nic *efx = tx_queue->efx;
EFX_WARN_ON_ONCE_PARANOID(index > tx_queue->ptr_mask);
efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl);
efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl,
&efv_pkts_compl);
tx_queue->pkts_compl += pkts_compl;
tx_queue->bytes_compl += bytes_compl;
if (pkts_compl > 1)
if (pkts_compl + efv_pkts_compl > 1)
++tx_queue->merge_events;
/* See if we need to restart the netif queue. This memory
@ -274,6 +287,7 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
void efx_enqueue_unwind(struct efx_tx_queue *tx_queue,
unsigned int insert_count)
{
unsigned int efv_pkts_compl = 0;
struct efx_tx_buffer *buffer;
unsigned int bytes_compl = 0;
unsigned int pkts_compl = 0;
@ -282,7 +296,8 @@ void efx_enqueue_unwind(struct efx_tx_queue *tx_queue,
while (tx_queue->insert_count != insert_count) {
--tx_queue->insert_count;
buffer = __efx_tx_queue_get_insert_buffer(tx_queue);
efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl,
&efv_pkts_compl);
}
}

View file

@ -19,7 +19,8 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue);
void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
struct efx_tx_buffer *buffer,
unsigned int *pkts_compl,
unsigned int *bytes_compl);
unsigned int *bytes_compl,
unsigned int *efv_pkts_compl);
static inline bool efx_tx_buffer_in_use(struct efx_tx_buffer *buffer)
{