sfc: replace asynchronous filter operations

Instead of having an efx->type->filter_rfs_insert() method, just use
 workitems with a worker function that calls efx->type->filter_insert().
The only user of this is efx_filter_rfs(), which now queues a call to
 efx_filter_rfs_work().
Similarly, efx_filter_rfs_expire() is now a worker function called on a
 new channel->filter_work work_struct, so the method
 efx->type->filter_rfs_expire_one() is no longer called in atomic context.
 We also add a new mutex efx->rps_mutex to protect the RPS state (efx->
 rps_expire_channel, efx->rps_expire_index, and channel->rps_flow_id) so
 that the taking of efx->filter_lock can be moved to
 efx->type->filter_rfs_expire_one().
Thus, all filter table functions are now called in a sleepable context,
 allowing them to use sleeping locks in a future patch.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Edward Cree 2018-03-27 17:41:59 +01:00 committed by David S. Miller
parent c709002c23
commit 3af0f34290
8 changed files with 124 additions and 208 deletions

View file

@ -4758,143 +4758,6 @@ static s32 efx_ef10_filter_get_rx_ids(struct efx_nic *efx,
#ifdef CONFIG_RFS_ACCEL
static efx_mcdi_async_completer efx_ef10_filter_rfs_insert_complete;
static s32 efx_ef10_filter_rfs_insert(struct efx_nic *efx,
struct efx_filter_spec *spec)
{
struct efx_ef10_filter_table *table = efx->filter_state;
MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
struct efx_filter_spec *saved_spec;
unsigned int hash, i, depth = 1;
bool replacing = false;
int ins_index = -1;
u64 cookie;
s32 rc;
/* Must be an RX filter without RSS and not for a multicast
* destination address (RFS only works for connected sockets).
* These restrictions allow us to pass only a tiny amount of
* data through to the completion function.
*/
EFX_WARN_ON_PARANOID(spec->flags !=
(EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_RX_SCATTER));
EFX_WARN_ON_PARANOID(spec->priority != EFX_FILTER_PRI_HINT);
EFX_WARN_ON_PARANOID(efx_filter_is_mc_recipient(spec));
hash = efx_ef10_filter_hash(spec);
spin_lock_bh(&efx->filter_lock);
/* Find any existing filter with the same match tuple or else
* a free slot to insert at. If an existing filter is busy,
* we have to give up.
*/
for (;;) {
i = (hash + depth) & (HUNT_FILTER_TBL_ROWS - 1);
saved_spec = efx_ef10_filter_entry_spec(table, i);
if (!saved_spec) {
if (ins_index < 0)
ins_index = i;
} else if (efx_ef10_filter_equal(spec, saved_spec)) {
if (table->entry[i].spec & EFX_EF10_FILTER_FLAG_BUSY) {
rc = -EBUSY;
goto fail_unlock;
}
if (spec->priority < saved_spec->priority) {
rc = -EPERM;
goto fail_unlock;
}
ins_index = i;
break;
}
/* Once we reach the maximum search depth, use the
* first suitable slot or return -EBUSY if there was
* none
*/
if (depth == EFX_EF10_FILTER_SEARCH_LIMIT) {
if (ins_index < 0) {
rc = -EBUSY;
goto fail_unlock;
}
break;
}
++depth;
}
/* Create a software table entry if necessary, and mark it
* busy. We might yet fail to insert, but any attempt to
* insert a conflicting filter while we're waiting for the
* firmware must find the busy entry.
*/
saved_spec = efx_ef10_filter_entry_spec(table, ins_index);
if (saved_spec) {
replacing = true;
} else {
saved_spec = kmalloc(sizeof(*spec), GFP_ATOMIC);
if (!saved_spec) {
rc = -ENOMEM;
goto fail_unlock;
}
*saved_spec = *spec;
}
efx_ef10_filter_set_entry(table, ins_index, saved_spec,
EFX_EF10_FILTER_FLAG_BUSY);
spin_unlock_bh(&efx->filter_lock);
/* Pack up the variables needed on completion */
cookie = replacing << 31 | ins_index << 16 | spec->dmaq_id;
efx_ef10_filter_push_prep(efx, spec, inbuf,
table->entry[ins_index].handle, NULL,
replacing);
efx_mcdi_rpc_async(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf),
MC_CMD_FILTER_OP_OUT_LEN,
efx_ef10_filter_rfs_insert_complete, cookie);
return ins_index;
fail_unlock:
spin_unlock_bh(&efx->filter_lock);
return rc;
}
static void
efx_ef10_filter_rfs_insert_complete(struct efx_nic *efx, unsigned long cookie,
int rc, efx_dword_t *outbuf,
size_t outlen_actual)
{
struct efx_ef10_filter_table *table = efx->filter_state;
unsigned int ins_index, dmaq_id;
struct efx_filter_spec *spec;
bool replacing;
/* Unpack the cookie */
replacing = cookie >> 31;
ins_index = (cookie >> 16) & (HUNT_FILTER_TBL_ROWS - 1);
dmaq_id = cookie & 0xffff;
spin_lock_bh(&efx->filter_lock);
spec = efx_ef10_filter_entry_spec(table, ins_index);
if (rc == 0) {
table->entry[ins_index].handle =
MCDI_QWORD(outbuf, FILTER_OP_OUT_HANDLE);
if (replacing)
spec->dmaq_id = dmaq_id;
} else if (!replacing) {
kfree(spec);
spec = NULL;
}
efx_ef10_filter_set_entry(table, ins_index, spec, 0);
spin_unlock_bh(&efx->filter_lock);
wake_up_all(&table->waitq);
}
static void
efx_ef10_filter_rfs_expire_complete(struct efx_nic *efx,
unsigned long filter_idx,
@ -4905,18 +4768,22 @@ static bool efx_ef10_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
unsigned int filter_idx)
{
struct efx_ef10_filter_table *table = efx->filter_state;
struct efx_filter_spec *spec =
efx_ef10_filter_entry_spec(table, filter_idx);
struct efx_filter_spec *spec;
MCDI_DECLARE_BUF(inbuf,
MC_CMD_FILTER_OP_IN_HANDLE_OFST +
MC_CMD_FILTER_OP_IN_HANDLE_LEN);
bool ret = true;
spin_lock_bh(&efx->filter_lock);
spec = efx_ef10_filter_entry_spec(table, filter_idx);
if (!spec ||
(table->entry[filter_idx].spec & EFX_EF10_FILTER_FLAG_BUSY) ||
spec->priority != EFX_FILTER_PRI_HINT ||
!rps_may_expire_flow(efx->net_dev, spec->dmaq_id,
flow_id, filter_idx))
return false;
flow_id, filter_idx)) {
ret = false;
goto out_unlock;
}
MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
MC_CMD_FILTER_OP_IN_OP_REMOVE);
@ -4924,10 +4791,12 @@ static bool efx_ef10_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
table->entry[filter_idx].handle);
if (efx_mcdi_rpc_async(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf), 0,
efx_ef10_filter_rfs_expire_complete, filter_idx))
return false;
table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_BUSY;
return true;
ret = false;
else
table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_BUSY;
out_unlock:
spin_unlock_bh(&efx->filter_lock);
return ret;
}
static void
@ -6784,7 +6653,6 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = {
.filter_get_rx_id_limit = efx_ef10_filter_get_rx_id_limit,
.filter_get_rx_ids = efx_ef10_filter_get_rx_ids,
#ifdef CONFIG_RFS_ACCEL
.filter_rfs_insert = efx_ef10_filter_rfs_insert,
.filter_rfs_expire_one = efx_ef10_filter_rfs_expire_one,
#endif
#ifdef CONFIG_SFC_MTD
@ -6897,7 +6765,6 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
.filter_get_rx_id_limit = efx_ef10_filter_get_rx_id_limit,
.filter_get_rx_ids = efx_ef10_filter_get_rx_ids,
#ifdef CONFIG_RFS_ACCEL
.filter_rfs_insert = efx_ef10_filter_rfs_insert,
.filter_rfs_expire_one = efx_ef10_filter_rfs_expire_one,
#endif
#ifdef CONFIG_SFC_MTD

View file

@ -340,7 +340,10 @@ static int efx_poll(struct napi_struct *napi, int budget)
efx_update_irq_mod(efx, channel);
}
efx_filter_rfs_expire(channel);
#ifdef CONFIG_RFS_ACCEL
/* Perhaps expire some ARFS filters */
schedule_work(&channel->filter_work);
#endif
/* There is no race here; although napi_disable() will
* only wait for napi_complete(), this isn't a problem
@ -470,6 +473,10 @@ efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel)
tx_queue->channel = channel;
}
#ifdef CONFIG_RFS_ACCEL
INIT_WORK(&channel->filter_work, efx_filter_rfs_expire);
#endif
rx_queue = &channel->rx_queue;
rx_queue->efx = efx;
timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
@ -512,6 +519,9 @@ efx_copy_channel(const struct efx_channel *old_channel)
rx_queue->buffer = NULL;
memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
#ifdef CONFIG_RFS_ACCEL
INIT_WORK(&channel->filter_work, efx_filter_rfs_expire);
#endif
return channel;
}
@ -3012,6 +3022,9 @@ static int efx_init_struct(struct efx_nic *efx,
efx->num_mac_stats = MC_CMD_MAC_NSTATS;
BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END);
mutex_init(&efx->mac_lock);
#ifdef CONFIG_RFS_ACCEL
mutex_init(&efx->rps_mutex);
#endif
efx->phy_op = &efx_dummy_phy_operations;
efx->mdio.dev = net_dev;
INIT_WORK(&efx->mac_work, efx_mac_work);

View file

@ -170,15 +170,18 @@ static inline s32 efx_filter_get_rx_ids(struct efx_nic *efx,
int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
u16 rxq_index, u32 flow_id);
bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned quota);
static inline void efx_filter_rfs_expire(struct efx_channel *channel)
static inline void efx_filter_rfs_expire(struct work_struct *data)
{
struct efx_channel *channel = container_of(data, struct efx_channel,
filter_work);
if (channel->rfs_filters_added >= 60 &&
__efx_filter_rfs_expire(channel->efx, 100))
channel->rfs_filters_added -= 60;
}
#define efx_filter_rfs_enabled() 1
#else
static inline void efx_filter_rfs_expire(struct efx_channel *channel) {}
static inline void efx_filter_rfs_expire(struct work_struct *data) {}
#define efx_filter_rfs_enabled() 0
#endif
bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec);

View file

@ -2901,28 +2901,25 @@ void efx_farch_filter_update_rx_scatter(struct efx_nic *efx)
#ifdef CONFIG_RFS_ACCEL
s32 efx_farch_filter_rfs_insert(struct efx_nic *efx,
struct efx_filter_spec *gen_spec)
{
return efx_farch_filter_insert(efx, gen_spec, true);
}
bool efx_farch_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
unsigned int index)
{
struct efx_farch_filter_state *state = efx->filter_state;
struct efx_farch_filter_table *table =
&state->table[EFX_FARCH_FILTER_TABLE_RX_IP];
struct efx_farch_filter_table *table;
bool ret = false;
spin_lock_bh(&efx->filter_lock);
table = &state->table[EFX_FARCH_FILTER_TABLE_RX_IP];
if (test_bit(index, table->used_bitmap) &&
table->spec[index].priority == EFX_FILTER_PRI_HINT &&
rps_may_expire_flow(efx->net_dev, table->spec[index].dmaq_id,
flow_id, index)) {
efx_farch_filter_table_clear_entry(efx, table, index);
return true;
ret = true;
}
return false;
spin_unlock_bh(&efx->filter_lock);
return ret;
}
#endif /* CONFIG_RFS_ACCEL */

View file

@ -430,6 +430,7 @@ enum efx_sync_events_state {
* @event_test_cpu: Last CPU to handle interrupt or test event for this channel
* @irq_count: Number of IRQs since last adaptive moderation decision
* @irq_mod_score: IRQ moderation score
* @filter_work: Work item for efx_filter_rfs_expire()
* @rps_flow_id: Flow IDs of filters allocated for accelerated RFS,
* indexed by filter ID
* @n_rx_tobe_disc: Count of RX_TOBE_DISC errors
@ -475,6 +476,7 @@ struct efx_channel {
unsigned int irq_mod_score;
#ifdef CONFIG_RFS_ACCEL
unsigned int rfs_filters_added;
struct work_struct filter_work;
#define RPS_FLOW_ID_INVALID 0xFFFFFFFF
u32 *rps_flow_id;
#endif
@ -844,6 +846,7 @@ struct efx_rss_context {
* @filter_sem: Filter table rw_semaphore, for freeing the table
* @filter_lock: Filter table lock, for mere content changes
* @filter_state: Architecture-dependent filter table state
* @rps_mutex: Protects RPS state of all channels
* @rps_expire_channel: Next channel to check for expiry
* @rps_expire_index: Next index to check for expiry in
* @rps_expire_channel's @rps_flow_id
@ -998,6 +1001,7 @@ struct efx_nic {
spinlock_t filter_lock;
void *filter_state;
#ifdef CONFIG_RFS_ACCEL
struct mutex rps_mutex;
unsigned int rps_expire_channel;
unsigned int rps_expire_index;
#endif
@ -1152,10 +1156,6 @@ struct efx_udp_tunnel {
* @filter_count_rx_used: Get the number of filters in use at a given priority
* @filter_get_rx_id_limit: Get maximum value of a filter id, plus 1
* @filter_get_rx_ids: Get list of RX filters at a given priority
* @filter_rfs_insert: Add or replace a filter for RFS. This must be
* atomic. The hardware change may be asynchronous but should
* not be delayed for long. It may fail if this can't be done
* atomically.
* @filter_rfs_expire_one: Consider expiring a filter inserted for RFS.
* This must check whether the specified table entry is used by RFS
* and that rps_may_expire_flow() returns true for it.
@ -1306,8 +1306,6 @@ struct efx_nic_type {
enum efx_filter_priority priority,
u32 *buf, u32 size);
#ifdef CONFIG_RFS_ACCEL
s32 (*filter_rfs_insert)(struct efx_nic *efx,
struct efx_filter_spec *spec);
bool (*filter_rfs_expire_one)(struct efx_nic *efx, u32 flow_id,
unsigned int index);
#endif

View file

@ -601,8 +601,6 @@ s32 efx_farch_filter_get_rx_ids(struct efx_nic *efx,
enum efx_filter_priority priority, u32 *buf,
u32 size);
#ifdef CONFIG_RFS_ACCEL
s32 efx_farch_filter_rfs_insert(struct efx_nic *efx,
struct efx_filter_spec *spec);
bool efx_farch_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
unsigned int index);
#endif

View file

@ -827,14 +827,67 @@ MODULE_PARM_DESC(rx_refill_threshold,
#ifdef CONFIG_RFS_ACCEL
/**
* struct efx_async_filter_insertion - Request to asynchronously insert a filter
* @net_dev: Reference to the netdevice
* @spec: The filter to insert
* @work: Workitem for this request
* @rxq_index: Identifies the channel for which this request was made
* @flow_id: Identifies the kernel-side flow for which this request was made
*/
struct efx_async_filter_insertion {
struct net_device *net_dev;
struct efx_filter_spec spec;
struct work_struct work;
u16 rxq_index;
u32 flow_id;
};
static void efx_filter_rfs_work(struct work_struct *data)
{
struct efx_async_filter_insertion *req = container_of(data, struct efx_async_filter_insertion,
work);
struct efx_nic *efx = netdev_priv(req->net_dev);
struct efx_channel *channel = efx_get_channel(efx, req->rxq_index);
int rc;
rc = efx->type->filter_insert(efx, &req->spec, false);
if (rc >= 0) {
/* Remember this so we can check whether to expire the filter
* later.
*/
mutex_lock(&efx->rps_mutex);
channel->rps_flow_id[rc] = req->flow_id;
++channel->rfs_filters_added;
mutex_unlock(&efx->rps_mutex);
if (req->spec.ether_type == htons(ETH_P_IP))
netif_info(efx, rx_status, efx->net_dev,
"steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d]\n",
(req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
req->spec.rem_host, ntohs(req->spec.rem_port),
req->spec.loc_host, ntohs(req->spec.loc_port),
req->rxq_index, req->flow_id, rc);
else
netif_info(efx, rx_status, efx->net_dev,
"steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d]\n",
(req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
req->spec.rem_host, ntohs(req->spec.rem_port),
req->spec.loc_host, ntohs(req->spec.loc_port),
req->rxq_index, req->flow_id, rc);
}
/* Release references */
dev_put(req->net_dev);
kfree(req);
}
int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
u16 rxq_index, u32 flow_id)
{
struct efx_nic *efx = netdev_priv(net_dev);
struct efx_channel *channel;
struct efx_filter_spec spec;
struct efx_async_filter_insertion *req;
struct flow_keys fk;
int rc;
if (flow_id == RPS_FLOW_ID_INVALID)
return -EINVAL;
@ -847,50 +900,39 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
if (fk.control.flags & FLOW_DIS_IS_FRAGMENT)
return -EPROTONOSUPPORT;
efx_filter_init_rx(&spec, EFX_FILTER_PRI_HINT,
req = kmalloc(sizeof(*req), GFP_ATOMIC);
if (!req)
return -ENOMEM;
efx_filter_init_rx(&req->spec, EFX_FILTER_PRI_HINT,
efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0,
rxq_index);
spec.match_flags =
req->spec.match_flags =
EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_IP_PROTO |
EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_LOC_PORT |
EFX_FILTER_MATCH_REM_HOST | EFX_FILTER_MATCH_REM_PORT;
spec.ether_type = fk.basic.n_proto;
spec.ip_proto = fk.basic.ip_proto;
req->spec.ether_type = fk.basic.n_proto;
req->spec.ip_proto = fk.basic.ip_proto;
if (fk.basic.n_proto == htons(ETH_P_IP)) {
spec.rem_host[0] = fk.addrs.v4addrs.src;
spec.loc_host[0] = fk.addrs.v4addrs.dst;
req->spec.rem_host[0] = fk.addrs.v4addrs.src;
req->spec.loc_host[0] = fk.addrs.v4addrs.dst;
} else {
memcpy(spec.rem_host, &fk.addrs.v6addrs.src, sizeof(struct in6_addr));
memcpy(spec.loc_host, &fk.addrs.v6addrs.dst, sizeof(struct in6_addr));
memcpy(req->spec.rem_host, &fk.addrs.v6addrs.src,
sizeof(struct in6_addr));
memcpy(req->spec.loc_host, &fk.addrs.v6addrs.dst,
sizeof(struct in6_addr));
}
spec.rem_port = fk.ports.src;
spec.loc_port = fk.ports.dst;
req->spec.rem_port = fk.ports.src;
req->spec.loc_port = fk.ports.dst;
rc = efx->type->filter_rfs_insert(efx, &spec);
if (rc < 0)
return rc;
/* Remember this so we can check whether to expire the filter later */
channel = efx_get_channel(efx, rxq_index);
channel->rps_flow_id[rc] = flow_id;
++channel->rfs_filters_added;
if (spec.ether_type == htons(ETH_P_IP))
netif_info(efx, rx_status, efx->net_dev,
"steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d]\n",
(spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
spec.rem_host, ntohs(spec.rem_port), spec.loc_host,
ntohs(spec.loc_port), rxq_index, flow_id, rc);
else
netif_info(efx, rx_status, efx->net_dev,
"steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d]\n",
(spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
spec.rem_host, ntohs(spec.rem_port), spec.loc_host,
ntohs(spec.loc_port), rxq_index, flow_id, rc);
return rc;
dev_hold(req->net_dev = net_dev);
INIT_WORK(&req->work, efx_filter_rfs_work);
req->rxq_index = rxq_index;
req->flow_id = flow_id;
schedule_work(&req->work);
return 0;
}
bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned int quota)
@ -899,9 +941,8 @@ bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned int quota)
unsigned int channel_idx, index, size;
u32 flow_id;
if (!spin_trylock_bh(&efx->filter_lock))
if (!mutex_trylock(&efx->rps_mutex))
return false;
expire_one = efx->type->filter_rfs_expire_one;
channel_idx = efx->rps_expire_channel;
index = efx->rps_expire_index;
@ -926,7 +967,7 @@ bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned int quota)
efx->rps_expire_channel = channel_idx;
efx->rps_expire_index = index;
spin_unlock_bh(&efx->filter_lock);
mutex_unlock(&efx->rps_mutex);
return true;
}

View file

@ -1035,7 +1035,6 @@ const struct efx_nic_type siena_a0_nic_type = {
.filter_get_rx_id_limit = efx_farch_filter_get_rx_id_limit,
.filter_get_rx_ids = efx_farch_filter_get_rx_ids,
#ifdef CONFIG_RFS_ACCEL
.filter_rfs_insert = efx_farch_filter_rfs_insert,
.filter_rfs_expire_one = efx_farch_filter_rfs_expire_one,
#endif
#ifdef CONFIG_SFC_MTD