Merge branch 'mlxsw-devlink-shared-buffers'

Jiri Pirko says:

====================
devlink + mlxsw: add support for config and control of shared buffers

ASICs implement shared buffer for packet forwarding purposes and enable
flexible partitioning of the shared buffer for different flows and ports,
enabling non-blocking progress of different flows as well as separation
of lossy traffic from loss-less traffic when using Per-Priority Flow
Control (PFC). The shared buffer optimizes the buffer utilization for better
absorption of packet bursts.

This patchset implements API which is based on the model SAI uses. That is
aligned with multiple ASIC vendors so this API should be vendor neutral.

Userspace counterpart patchset for devlink iproute2 tool can be found here:
https://github.com/jpirko/iproute2_mlxsw/tree/devlink_sb

Couple of examples of usage:

switch$ devlink sb help
Usage: devlink sb show [ DEV [ sb SB_INDEX ] ]
       devlink sb pool show [ DEV [ sb SB_INDEX ] pool POOL_INDEX ]
       devlink sb pool set DEV [ sb SB_INDEX ] pool POOL_INDEX
                           size POOL_SIZE thtype { static | dynamic }
       devlink sb port pool show [ DEV/PORT_INDEX [ sb SB_INDEX ]
                                   pool POOL_INDEX ]
       devlink sb port pool set DEV/PORT_INDEX [ sb SB_INDEX ]
                                pool POOL_INDEX th THRESHOLD
       devlink sb tc bind show [ DEV/PORT_INDEX [ sb SB_INDEX ] tc TC_INDEX ]
       devlink sb tc bind set DEV/PORT_INDEX [ sb SB_INDEX ] tc TC_INDEX
                              type { ingress | egress } pool POOL_INDEX
                              th THRESHOLD
       devlink sb occupancy show { DEV | DEV/PORT_INDEX } [ sb SB_INDEX ]
       devlink sb occupancy snapshot DEV [ sb SB_INDEX ]
       devlink sb occupancy clearmax DEV [ sb SB_INDEX ]

switch$ devlink sb show
pci/0000:03:00.0: sb 0 size 16777216 ing_pools 4 eg_pools 4 ing_tcs 8 eg_tcs 8

switch$ devlink sb pool show
pci/0000:03:00.0: sb 0 pool 0 type ingress size 12400032 thtype dynamic
pci/0000:03:00.0: sb 0 pool 1 type ingress size 0 thtype dynamic
pci/0000:03:00.0: sb 0 pool 2 type ingress size 0 thtype dynamic
pci/0000:03:00.0: sb 0 pool 3 type ingress size 200064 thtype dynamic
pci/0000:03:00.0: sb 0 pool 4 type egress size 13220064 thtype dynamic
pci/0000:03:00.0: sb 0 pool 5 type egress size 0 thtype dynamic
pci/0000:03:00.0: sb 0 pool 6 type egress size 0 thtype dynamic
pci/0000:03:00.0: sb 0 pool 7 type egress size 0 thtype dynamic

switch$ devlink sb port pool show sw0p7 pool 0
sw0p7: sb 0 pool 0 threshold 16

switch$ sudo devlink sb port pool set sw0p7 pool 0 th 15

switch$ devlink sb port pool show sw0p7 pool 0
sw0p7: sb 0 pool 0 threshold 15

switch$ devlink sb tc bind show sw0p7 tc 0 type ingress
sw0p7: sb 0 tc 0 type ingress pool 0 threshold 10

switch$ sudo devlink sb tc bind set sw0p7 tc 0 type ingress pool 0 th 9

switch$ devlink sb tc bind show sw0p7 tc 0 type ingress
sw0p7: sb 0 tc 0 type ingress pool 0 threshold 9

switch$ sudo devlink sb occupancy snapshot pci/0000:03:00.0

switch$ devlink sb occupancy show sw0p7
sw0p7:
  pool: 0:      82944/3217344 1:          0/0       2:          0/0       3:          0/0
        4:          0/384     5:          0/0       6:          0/0       7:          0/0
  itc:  0(0):   96768/3217344 1(0):       0/0       2(0):       0/0       3(0):       0/0
        4(0):       0/0       5(0):       0/0       6(0):       0/0       7(0):       0/0
  etc:  0(4):       0/384     1(4):       0/0       2(4):       0/0       3(4):       0/0
        4(4):       0/0       5(4):       0/0       6(4):       0/0       7(4):       0/0

switch$ sudo devlink sb occupancy clearmax pci/0000:03:00.0
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2016-04-14 16:22:12 -04:00
commit cb689269ac
10 changed files with 2691 additions and 426 deletions

View file

@ -44,7 +44,7 @@
#include <linux/seq_file.h>
#include <linux/u64_stats_sync.h>
#include <linux/netdevice.h>
#include <linux/wait.h>
#include <linux/completion.h>
#include <linux/skbuff.h>
#include <linux/etherdevice.h>
#include <linux/types.h>
@ -55,6 +55,7 @@
#include <linux/mutex.h>
#include <linux/rcupdate.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
#include <asm/byteorder.h>
#include <net/devlink.h>
@ -73,6 +74,8 @@ static const char mlxsw_core_driver_name[] = "mlxsw_core";
static struct dentry *mlxsw_core_dbg_root;
static struct workqueue_struct *mlxsw_wq;
struct mlxsw_core_pcpu_stats {
u64 trap_rx_packets[MLXSW_TRAP_ID_MAX];
u64 trap_rx_bytes[MLXSW_TRAP_ID_MAX];
@ -93,11 +96,9 @@ struct mlxsw_core {
struct list_head rx_listener_list;
struct list_head event_listener_list;
struct {
struct sk_buff *resp_skb;
u64 tid;
wait_queue_head_t wait;
bool trans_active;
struct mutex lock; /* One EMAD transaction at a time. */
atomic64_t tid;
struct list_head trans_list;
spinlock_t trans_list_lock; /* protects trans_list writes */
bool use_emad;
} emad;
struct mlxsw_core_pcpu_stats __percpu *pcpu_stats;
@ -290,7 +291,7 @@ static void mlxsw_emad_pack_reg_tlv(char *reg_tlv,
static void mlxsw_emad_pack_op_tlv(char *op_tlv,
const struct mlxsw_reg_info *reg,
enum mlxsw_core_reg_access_type type,
struct mlxsw_core *mlxsw_core)
u64 tid)
{
mlxsw_emad_op_tlv_type_set(op_tlv, MLXSW_EMAD_TLV_TYPE_OP);
mlxsw_emad_op_tlv_len_set(op_tlv, MLXSW_EMAD_OP_TLV_LEN);
@ -306,7 +307,7 @@ static void mlxsw_emad_pack_op_tlv(char *op_tlv,
MLXSW_EMAD_OP_TLV_METHOD_WRITE);
mlxsw_emad_op_tlv_class_set(op_tlv,
MLXSW_EMAD_OP_TLV_CLASS_REG_ACCESS);
mlxsw_emad_op_tlv_tid_set(op_tlv, mlxsw_core->emad.tid);
mlxsw_emad_op_tlv_tid_set(op_tlv, tid);
}
static int mlxsw_emad_construct_eth_hdr(struct sk_buff *skb)
@ -328,7 +329,7 @@ static void mlxsw_emad_construct(struct sk_buff *skb,
const struct mlxsw_reg_info *reg,
char *payload,
enum mlxsw_core_reg_access_type type,
struct mlxsw_core *mlxsw_core)
u64 tid)
{
char *buf;
@ -339,7 +340,7 @@ static void mlxsw_emad_construct(struct sk_buff *skb,
mlxsw_emad_pack_reg_tlv(buf, reg, payload);
buf = skb_push(skb, MLXSW_EMAD_OP_TLV_LEN * sizeof(u32));
mlxsw_emad_pack_op_tlv(buf, reg, type, mlxsw_core);
mlxsw_emad_pack_op_tlv(buf, reg, type, tid);
mlxsw_emad_construct_eth_hdr(skb);
}
@ -376,58 +377,16 @@ static bool mlxsw_emad_is_resp(const struct sk_buff *skb)
return (mlxsw_emad_op_tlv_r_get(op_tlv) == MLXSW_EMAD_OP_TLV_RESPONSE);
}
#define MLXSW_EMAD_TIMEOUT_MS 200
static int __mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core,
struct sk_buff *skb,
const struct mlxsw_tx_info *tx_info)
static int mlxsw_emad_process_status(char *op_tlv,
enum mlxsw_emad_op_tlv_status *p_status)
{
int err;
int ret;
*p_status = mlxsw_emad_op_tlv_status_get(op_tlv);
mlxsw_core->emad.trans_active = true;
err = mlxsw_core_skb_transmit(mlxsw_core, skb, tx_info);
if (err) {
dev_err(mlxsw_core->bus_info->dev, "Failed to transmit EMAD (tid=%llx)\n",
mlxsw_core->emad.tid);
dev_kfree_skb(skb);
goto trans_inactive_out;
}
ret = wait_event_timeout(mlxsw_core->emad.wait,
!(mlxsw_core->emad.trans_active),
msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_MS));
if (!ret) {
dev_warn(mlxsw_core->bus_info->dev, "EMAD timed-out (tid=%llx)\n",
mlxsw_core->emad.tid);
err = -EIO;
goto trans_inactive_out;
}
return 0;
trans_inactive_out:
mlxsw_core->emad.trans_active = false;
return err;
}
static int mlxsw_emad_process_status(struct mlxsw_core *mlxsw_core,
char *op_tlv)
{
enum mlxsw_emad_op_tlv_status status;
u64 tid;
status = mlxsw_emad_op_tlv_status_get(op_tlv);
tid = mlxsw_emad_op_tlv_tid_get(op_tlv);
switch (status) {
switch (*p_status) {
case MLXSW_EMAD_OP_TLV_STATUS_SUCCESS:
return 0;
case MLXSW_EMAD_OP_TLV_STATUS_BUSY:
case MLXSW_EMAD_OP_TLV_STATUS_MESSAGE_RECEIPT_ACK:
dev_warn(mlxsw_core->bus_info->dev, "Reg access status again (tid=%llx,status=%x(%s))\n",
tid, status, mlxsw_emad_op_tlv_status_str(status));
return -EAGAIN;
case MLXSW_EMAD_OP_TLV_STATUS_VERSION_NOT_SUPPORTED:
case MLXSW_EMAD_OP_TLV_STATUS_UNKNOWN_TLV:
@ -438,70 +397,150 @@ static int mlxsw_emad_process_status(struct mlxsw_core *mlxsw_core,
case MLXSW_EMAD_OP_TLV_STATUS_RESOURCE_NOT_AVAILABLE:
case MLXSW_EMAD_OP_TLV_STATUS_INTERNAL_ERROR:
default:
dev_err(mlxsw_core->bus_info->dev, "Reg access status failed (tid=%llx,status=%x(%s))\n",
tid, status, mlxsw_emad_op_tlv_status_str(status));
return -EIO;
}
}
static int mlxsw_emad_process_status_skb(struct mlxsw_core *mlxsw_core,
struct sk_buff *skb)
static int
mlxsw_emad_process_status_skb(struct sk_buff *skb,
enum mlxsw_emad_op_tlv_status *p_status)
{
return mlxsw_emad_process_status(mlxsw_core, mlxsw_emad_op_tlv(skb));
return mlxsw_emad_process_status(mlxsw_emad_op_tlv(skb), p_status);
}
struct mlxsw_reg_trans {
struct list_head list;
struct list_head bulk_list;
struct mlxsw_core *core;
struct sk_buff *tx_skb;
struct mlxsw_tx_info tx_info;
struct delayed_work timeout_dw;
unsigned int retries;
u64 tid;
struct completion completion;
atomic_t active;
mlxsw_reg_trans_cb_t *cb;
unsigned long cb_priv;
const struct mlxsw_reg_info *reg;
enum mlxsw_core_reg_access_type type;
int err;
enum mlxsw_emad_op_tlv_status emad_status;
struct rcu_head rcu;
};
#define MLXSW_EMAD_TIMEOUT_MS 200
static void mlxsw_emad_trans_timeout_schedule(struct mlxsw_reg_trans *trans)
{
unsigned long timeout = msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_MS);
mlxsw_core_schedule_dw(&trans->timeout_dw, timeout);
}
static int mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core,
struct sk_buff *skb,
const struct mlxsw_tx_info *tx_info)
struct mlxsw_reg_trans *trans)
{
struct sk_buff *trans_skb;
int n_retry;
struct sk_buff *skb;
int err;
n_retry = 0;
retry:
/* We copy the EMAD to a new skb, since we might need
* to retransmit it in case of failure.
*/
trans_skb = skb_copy(skb, GFP_KERNEL);
if (!trans_skb) {
err = -ENOMEM;
goto out;
skb = skb_copy(trans->tx_skb, GFP_KERNEL);
if (!skb)
return -ENOMEM;
atomic_set(&trans->active, 1);
err = mlxsw_core_skb_transmit(mlxsw_core, skb, &trans->tx_info);
if (err) {
dev_kfree_skb(skb);
return err;
}
err = __mlxsw_emad_transmit(mlxsw_core, trans_skb, tx_info);
if (!err) {
struct sk_buff *resp_skb = mlxsw_core->emad.resp_skb;
err = mlxsw_emad_process_status_skb(mlxsw_core, resp_skb);
if (err)
dev_kfree_skb(resp_skb);
if (!err || err != -EAGAIN)
goto out;
}
if (n_retry++ < MLXSW_EMAD_MAX_RETRY)
goto retry;
out:
dev_kfree_skb(skb);
mlxsw_core->emad.tid++;
return err;
mlxsw_emad_trans_timeout_schedule(trans);
return 0;
}
static void mlxsw_emad_trans_finish(struct mlxsw_reg_trans *trans, int err)
{
struct mlxsw_core *mlxsw_core = trans->core;
dev_kfree_skb(trans->tx_skb);
spin_lock_bh(&mlxsw_core->emad.trans_list_lock);
list_del_rcu(&trans->list);
spin_unlock_bh(&mlxsw_core->emad.trans_list_lock);
trans->err = err;
complete(&trans->completion);
}
static void mlxsw_emad_transmit_retry(struct mlxsw_core *mlxsw_core,
struct mlxsw_reg_trans *trans)
{
int err;
if (trans->retries < MLXSW_EMAD_MAX_RETRY) {
trans->retries++;
err = mlxsw_emad_transmit(trans->core, trans);
if (err == 0)
return;
} else {
err = -EIO;
}
mlxsw_emad_trans_finish(trans, err);
}
static void mlxsw_emad_trans_timeout_work(struct work_struct *work)
{
struct mlxsw_reg_trans *trans = container_of(work,
struct mlxsw_reg_trans,
timeout_dw.work);
if (!atomic_dec_and_test(&trans->active))
return;
mlxsw_emad_transmit_retry(trans->core, trans);
}
static void mlxsw_emad_process_response(struct mlxsw_core *mlxsw_core,
struct mlxsw_reg_trans *trans,
struct sk_buff *skb)
{
int err;
if (!atomic_dec_and_test(&trans->active))
return;
err = mlxsw_emad_process_status_skb(skb, &trans->emad_status);
if (err == -EAGAIN) {
mlxsw_emad_transmit_retry(mlxsw_core, trans);
} else {
if (err == 0) {
char *op_tlv = mlxsw_emad_op_tlv(skb);
if (trans->cb)
trans->cb(mlxsw_core,
mlxsw_emad_reg_payload(op_tlv),
trans->reg->len, trans->cb_priv);
}
mlxsw_emad_trans_finish(trans, err);
}
}
/* called with rcu read lock held */
static void mlxsw_emad_rx_listener_func(struct sk_buff *skb, u8 local_port,
void *priv)
{
struct mlxsw_core *mlxsw_core = priv;
struct mlxsw_reg_trans *trans;
if (mlxsw_emad_is_resp(skb) &&
mlxsw_core->emad.trans_active &&
mlxsw_emad_get_tid(skb) == mlxsw_core->emad.tid) {
mlxsw_core->emad.resp_skb = skb;
mlxsw_core->emad.trans_active = false;
wake_up(&mlxsw_core->emad.wait);
} else {
dev_kfree_skb(skb);
if (!mlxsw_emad_is_resp(skb))
goto free_skb;
list_for_each_entry_rcu(trans, &mlxsw_core->emad.trans_list, list) {
if (mlxsw_emad_get_tid(skb) == trans->tid) {
mlxsw_emad_process_response(mlxsw_core, trans, skb);
break;
}
}
free_skb:
dev_kfree_skb(skb);
}
static const struct mlxsw_rx_listener mlxsw_emad_rx_listener = {
@ -528,18 +567,19 @@ static int mlxsw_emad_traps_set(struct mlxsw_core *mlxsw_core)
static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core)
{
u64 tid;
int err;
/* Set the upper 32 bits of the transaction ID field to a random
* number. This allows us to discard EMADs addressed to other
* devices.
*/
get_random_bytes(&mlxsw_core->emad.tid, 4);
mlxsw_core->emad.tid = mlxsw_core->emad.tid << 32;
get_random_bytes(&tid, 4);
tid <<= 32;
atomic64_set(&mlxsw_core->emad.tid, tid);
init_waitqueue_head(&mlxsw_core->emad.wait);
mlxsw_core->emad.trans_active = false;
mutex_init(&mlxsw_core->emad.lock);
INIT_LIST_HEAD(&mlxsw_core->emad.trans_list);
spin_lock_init(&mlxsw_core->emad.trans_list_lock);
err = mlxsw_core_rx_listener_register(mlxsw_core,
&mlxsw_emad_rx_listener,
@ -597,6 +637,59 @@ static struct sk_buff *mlxsw_emad_alloc(const struct mlxsw_core *mlxsw_core,
return skb;
}
static int mlxsw_emad_reg_access(struct mlxsw_core *mlxsw_core,
const struct mlxsw_reg_info *reg,
char *payload,
enum mlxsw_core_reg_access_type type,
struct mlxsw_reg_trans *trans,
struct list_head *bulk_list,
mlxsw_reg_trans_cb_t *cb,
unsigned long cb_priv, u64 tid)
{
struct sk_buff *skb;
int err;
dev_dbg(mlxsw_core->bus_info->dev, "EMAD reg access (tid=%llx,reg_id=%x(%s),type=%s)\n",
trans->tid, reg->id, mlxsw_reg_id_str(reg->id),
mlxsw_core_reg_access_type_str(type));
skb = mlxsw_emad_alloc(mlxsw_core, reg->len);
if (!skb)
return -ENOMEM;
list_add_tail(&trans->bulk_list, bulk_list);
trans->core = mlxsw_core;
trans->tx_skb = skb;
trans->tx_info.local_port = MLXSW_PORT_CPU_PORT;
trans->tx_info.is_emad = true;
INIT_DELAYED_WORK(&trans->timeout_dw, mlxsw_emad_trans_timeout_work);
trans->tid = tid;
init_completion(&trans->completion);
trans->cb = cb;
trans->cb_priv = cb_priv;
trans->reg = reg;
trans->type = type;
mlxsw_emad_construct(skb, reg, payload, type, trans->tid);
mlxsw_core->driver->txhdr_construct(skb, &trans->tx_info);
spin_lock_bh(&mlxsw_core->emad.trans_list_lock);
list_add_tail_rcu(&trans->list, &mlxsw_core->emad.trans_list);
spin_unlock_bh(&mlxsw_core->emad.trans_list_lock);
err = mlxsw_emad_transmit(mlxsw_core, trans);
if (err)
goto err_out;
return 0;
err_out:
spin_lock_bh(&mlxsw_core->emad.trans_list_lock);
list_del_rcu(&trans->list);
spin_unlock_bh(&mlxsw_core->emad.trans_list_lock);
list_del(&trans->bulk_list);
dev_kfree_skb(trans->tx_skb);
return err;
}
/*****************
* Core functions
*****************/
@ -686,24 +779,6 @@ static const struct file_operations mlxsw_core_rx_stats_dbg_ops = {
.llseek = seq_lseek
};
static void mlxsw_core_buf_dump_dbg(struct mlxsw_core *mlxsw_core,
const char *buf, size_t size)
{
__be32 *m = (__be32 *) buf;
int i;
int count = size / sizeof(__be32);
for (i = count - 1; i >= 0; i--)
if (m[i])
break;
i++;
count = i ? i : 1;
for (i = 0; i < count; i += 4)
dev_dbg(mlxsw_core->bus_info->dev, "%04x - %08x %08x %08x %08x\n",
i * 4, be32_to_cpu(m[i]), be32_to_cpu(m[i + 1]),
be32_to_cpu(m[i + 2]), be32_to_cpu(m[i + 3]));
}
int mlxsw_core_driver_register(struct mlxsw_driver *mlxsw_driver)
{
spin_lock(&mlxsw_core_driver_list_lock);
@ -816,9 +891,168 @@ static int mlxsw_devlink_port_unsplit(struct devlink *devlink,
return mlxsw_core->driver->port_unsplit(mlxsw_core, port_index);
}
static int
mlxsw_devlink_sb_pool_get(struct devlink *devlink,
unsigned int sb_index, u16 pool_index,
struct devlink_sb_pool_info *pool_info)
{
struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
if (!mlxsw_driver->sb_pool_get)
return -EOPNOTSUPP;
return mlxsw_driver->sb_pool_get(mlxsw_core, sb_index,
pool_index, pool_info);
}
static int
mlxsw_devlink_sb_pool_set(struct devlink *devlink,
unsigned int sb_index, u16 pool_index, u32 size,
enum devlink_sb_threshold_type threshold_type)
{
struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
if (!mlxsw_driver->sb_pool_set)
return -EOPNOTSUPP;
return mlxsw_driver->sb_pool_set(mlxsw_core, sb_index,
pool_index, size, threshold_type);
}
static void *__dl_port(struct devlink_port *devlink_port)
{
return container_of(devlink_port, struct mlxsw_core_port, devlink_port);
}
static int mlxsw_devlink_sb_port_pool_get(struct devlink_port *devlink_port,
unsigned int sb_index, u16 pool_index,
u32 *p_threshold)
{
struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink);
struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port);
if (!mlxsw_driver->sb_port_pool_get)
return -EOPNOTSUPP;
return mlxsw_driver->sb_port_pool_get(mlxsw_core_port, sb_index,
pool_index, p_threshold);
}
static int mlxsw_devlink_sb_port_pool_set(struct devlink_port *devlink_port,
unsigned int sb_index, u16 pool_index,
u32 threshold)
{
struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink);
struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port);
if (!mlxsw_driver->sb_port_pool_set)
return -EOPNOTSUPP;
return mlxsw_driver->sb_port_pool_set(mlxsw_core_port, sb_index,
pool_index, threshold);
}
static int
mlxsw_devlink_sb_tc_pool_bind_get(struct devlink_port *devlink_port,
unsigned int sb_index, u16 tc_index,
enum devlink_sb_pool_type pool_type,
u16 *p_pool_index, u32 *p_threshold)
{
struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink);
struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port);
if (!mlxsw_driver->sb_tc_pool_bind_get)
return -EOPNOTSUPP;
return mlxsw_driver->sb_tc_pool_bind_get(mlxsw_core_port, sb_index,
tc_index, pool_type,
p_pool_index, p_threshold);
}
static int
mlxsw_devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port,
unsigned int sb_index, u16 tc_index,
enum devlink_sb_pool_type pool_type,
u16 pool_index, u32 threshold)
{
struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink);
struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port);
if (!mlxsw_driver->sb_tc_pool_bind_set)
return -EOPNOTSUPP;
return mlxsw_driver->sb_tc_pool_bind_set(mlxsw_core_port, sb_index,
tc_index, pool_type,
pool_index, threshold);
}
static int mlxsw_devlink_sb_occ_snapshot(struct devlink *devlink,
unsigned int sb_index)
{
struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
if (!mlxsw_driver->sb_occ_snapshot)
return -EOPNOTSUPP;
return mlxsw_driver->sb_occ_snapshot(mlxsw_core, sb_index);
}
static int mlxsw_devlink_sb_occ_max_clear(struct devlink *devlink,
unsigned int sb_index)
{
struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
if (!mlxsw_driver->sb_occ_max_clear)
return -EOPNOTSUPP;
return mlxsw_driver->sb_occ_max_clear(mlxsw_core, sb_index);
}
static int
mlxsw_devlink_sb_occ_port_pool_get(struct devlink_port *devlink_port,
unsigned int sb_index, u16 pool_index,
u32 *p_cur, u32 *p_max)
{
struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink);
struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port);
if (!mlxsw_driver->sb_occ_port_pool_get)
return -EOPNOTSUPP;
return mlxsw_driver->sb_occ_port_pool_get(mlxsw_core_port, sb_index,
pool_index, p_cur, p_max);
}
static int
mlxsw_devlink_sb_occ_tc_port_bind_get(struct devlink_port *devlink_port,
unsigned int sb_index, u16 tc_index,
enum devlink_sb_pool_type pool_type,
u32 *p_cur, u32 *p_max)
{
struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink);
struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port);
if (!mlxsw_driver->sb_occ_tc_port_bind_get)
return -EOPNOTSUPP;
return mlxsw_driver->sb_occ_tc_port_bind_get(mlxsw_core_port,
sb_index, tc_index,
pool_type, p_cur, p_max);
}
static const struct devlink_ops mlxsw_devlink_ops = {
.port_split = mlxsw_devlink_port_split,
.port_unsplit = mlxsw_devlink_port_unsplit,
.port_split = mlxsw_devlink_port_split,
.port_unsplit = mlxsw_devlink_port_unsplit,
.sb_pool_get = mlxsw_devlink_sb_pool_get,
.sb_pool_set = mlxsw_devlink_sb_pool_set,
.sb_port_pool_get = mlxsw_devlink_sb_port_pool_get,
.sb_port_pool_set = mlxsw_devlink_sb_port_pool_set,
.sb_tc_pool_bind_get = mlxsw_devlink_sb_tc_pool_bind_get,
.sb_tc_pool_bind_set = mlxsw_devlink_sb_tc_pool_bind_set,
.sb_occ_snapshot = mlxsw_devlink_sb_occ_snapshot,
.sb_occ_max_clear = mlxsw_devlink_sb_occ_max_clear,
.sb_occ_port_pool_get = mlxsw_devlink_sb_occ_port_pool_get,
.sb_occ_tc_port_bind_get = mlxsw_devlink_sb_occ_tc_port_bind_get,
};
int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
@ -1102,56 +1336,112 @@ void mlxsw_core_event_listener_unregister(struct mlxsw_core *mlxsw_core,
}
EXPORT_SYMBOL(mlxsw_core_event_listener_unregister);
static u64 mlxsw_core_tid_get(struct mlxsw_core *mlxsw_core)
{
return atomic64_inc_return(&mlxsw_core->emad.tid);
}
static int mlxsw_core_reg_access_emad(struct mlxsw_core *mlxsw_core,
const struct mlxsw_reg_info *reg,
char *payload,
enum mlxsw_core_reg_access_type type)
enum mlxsw_core_reg_access_type type,
struct list_head *bulk_list,
mlxsw_reg_trans_cb_t *cb,
unsigned long cb_priv)
{
u64 tid = mlxsw_core_tid_get(mlxsw_core);
struct mlxsw_reg_trans *trans;
int err;
char *op_tlv;
struct sk_buff *skb;
struct mlxsw_tx_info tx_info = {
.local_port = MLXSW_PORT_CPU_PORT,
.is_emad = true,
};
skb = mlxsw_emad_alloc(mlxsw_core, reg->len);
if (!skb)
trans = kzalloc(sizeof(*trans), GFP_KERNEL);
if (!trans)
return -ENOMEM;
mlxsw_emad_construct(skb, reg, payload, type, mlxsw_core);
mlxsw_core->driver->txhdr_construct(skb, &tx_info);
dev_dbg(mlxsw_core->bus_info->dev, "EMAD send (tid=%llx)\n",
mlxsw_core->emad.tid);
mlxsw_core_buf_dump_dbg(mlxsw_core, skb->data, skb->len);
err = mlxsw_emad_transmit(mlxsw_core, skb, &tx_info);
if (!err) {
op_tlv = mlxsw_emad_op_tlv(mlxsw_core->emad.resp_skb);
memcpy(payload, mlxsw_emad_reg_payload(op_tlv),
reg->len);
dev_dbg(mlxsw_core->bus_info->dev, "EMAD recv (tid=%llx)\n",
mlxsw_core->emad.tid - 1);
mlxsw_core_buf_dump_dbg(mlxsw_core,
mlxsw_core->emad.resp_skb->data,
mlxsw_core->emad.resp_skb->len);
dev_kfree_skb(mlxsw_core->emad.resp_skb);
err = mlxsw_emad_reg_access(mlxsw_core, reg, payload, type, trans,
bulk_list, cb, cb_priv, tid);
if (err) {
kfree(trans);
return err;
}
return 0;
}
int mlxsw_reg_trans_query(struct mlxsw_core *mlxsw_core,
const struct mlxsw_reg_info *reg, char *payload,
struct list_head *bulk_list,
mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv)
{
return mlxsw_core_reg_access_emad(mlxsw_core, reg, payload,
MLXSW_CORE_REG_ACCESS_TYPE_QUERY,
bulk_list, cb, cb_priv);
}
EXPORT_SYMBOL(mlxsw_reg_trans_query);
int mlxsw_reg_trans_write(struct mlxsw_core *mlxsw_core,
const struct mlxsw_reg_info *reg, char *payload,
struct list_head *bulk_list,
mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv)
{
return mlxsw_core_reg_access_emad(mlxsw_core, reg, payload,
MLXSW_CORE_REG_ACCESS_TYPE_WRITE,
bulk_list, cb, cb_priv);
}
EXPORT_SYMBOL(mlxsw_reg_trans_write);
static int mlxsw_reg_trans_wait(struct mlxsw_reg_trans *trans)
{
struct mlxsw_core *mlxsw_core = trans->core;
int err;
wait_for_completion(&trans->completion);
cancel_delayed_work_sync(&trans->timeout_dw);
err = trans->err;
if (trans->retries)
dev_warn(mlxsw_core->bus_info->dev, "EMAD retries (%d/%d) (tid=%llx)\n",
trans->retries, MLXSW_EMAD_MAX_RETRY, trans->tid);
if (err)
dev_err(mlxsw_core->bus_info->dev, "EMAD reg access failed (tid=%llx,reg_id=%x(%s),type=%s,status=%x(%s))\n",
trans->tid, trans->reg->id,
mlxsw_reg_id_str(trans->reg->id),
mlxsw_core_reg_access_type_str(trans->type),
trans->emad_status,
mlxsw_emad_op_tlv_status_str(trans->emad_status));
list_del(&trans->bulk_list);
kfree_rcu(trans, rcu);
return err;
}
int mlxsw_reg_trans_bulk_wait(struct list_head *bulk_list)
{
struct mlxsw_reg_trans *trans;
struct mlxsw_reg_trans *tmp;
int sum_err = 0;
int err;
list_for_each_entry_safe(trans, tmp, bulk_list, bulk_list) {
err = mlxsw_reg_trans_wait(trans);
if (err && sum_err == 0)
sum_err = err; /* first error to be returned */
}
return sum_err;
}
EXPORT_SYMBOL(mlxsw_reg_trans_bulk_wait);
static int mlxsw_core_reg_access_cmd(struct mlxsw_core *mlxsw_core,
const struct mlxsw_reg_info *reg,
char *payload,
enum mlxsw_core_reg_access_type type)
{
enum mlxsw_emad_op_tlv_status status;
int err, n_retry;
char *in_mbox, *out_mbox, *tmp;
dev_dbg(mlxsw_core->bus_info->dev, "Reg cmd access (reg_id=%x(%s),type=%s)\n",
reg->id, mlxsw_reg_id_str(reg->id),
mlxsw_core_reg_access_type_str(type));
in_mbox = mlxsw_cmd_mbox_alloc();
if (!in_mbox)
return -ENOMEM;
@ -1162,7 +1452,8 @@ static int mlxsw_core_reg_access_cmd(struct mlxsw_core *mlxsw_core,
goto free_in_mbox;
}
mlxsw_emad_pack_op_tlv(in_mbox, reg, type, mlxsw_core);
mlxsw_emad_pack_op_tlv(in_mbox, reg, type,
mlxsw_core_tid_get(mlxsw_core));
tmp = in_mbox + MLXSW_EMAD_OP_TLV_LEN * sizeof(u32);
mlxsw_emad_pack_reg_tlv(tmp, reg, payload);
@ -1170,60 +1461,61 @@ static int mlxsw_core_reg_access_cmd(struct mlxsw_core *mlxsw_core,
retry:
err = mlxsw_cmd_access_reg(mlxsw_core, in_mbox, out_mbox);
if (!err) {
err = mlxsw_emad_process_status(mlxsw_core, out_mbox);
if (err == -EAGAIN && n_retry++ < MLXSW_EMAD_MAX_RETRY)
goto retry;
err = mlxsw_emad_process_status(out_mbox, &status);
if (err) {
if (err == -EAGAIN && n_retry++ < MLXSW_EMAD_MAX_RETRY)
goto retry;
dev_err(mlxsw_core->bus_info->dev, "Reg cmd access status failed (status=%x(%s))\n",
status, mlxsw_emad_op_tlv_status_str(status));
}
}
if (!err)
memcpy(payload, mlxsw_emad_reg_payload(out_mbox),
reg->len);
mlxsw_core->emad.tid++;
mlxsw_cmd_mbox_free(out_mbox);
free_in_mbox:
mlxsw_cmd_mbox_free(in_mbox);
if (err)
dev_err(mlxsw_core->bus_info->dev, "Reg cmd access failed (reg_id=%x(%s),type=%s)\n",
reg->id, mlxsw_reg_id_str(reg->id),
mlxsw_core_reg_access_type_str(type));
return err;
}
static void mlxsw_core_reg_access_cb(struct mlxsw_core *mlxsw_core,
char *payload, size_t payload_len,
unsigned long cb_priv)
{
char *orig_payload = (char *) cb_priv;
memcpy(orig_payload, payload, payload_len);
}
static int mlxsw_core_reg_access(struct mlxsw_core *mlxsw_core,
const struct mlxsw_reg_info *reg,
char *payload,
enum mlxsw_core_reg_access_type type)
{
u64 cur_tid;
LIST_HEAD(bulk_list);
int err;
if (mutex_lock_interruptible(&mlxsw_core->emad.lock)) {
dev_err(mlxsw_core->bus_info->dev, "Reg access interrupted (reg_id=%x(%s),type=%s)\n",
reg->id, mlxsw_reg_id_str(reg->id),
mlxsw_core_reg_access_type_str(type));
return -EINTR;
}
cur_tid = mlxsw_core->emad.tid;
dev_dbg(mlxsw_core->bus_info->dev, "Reg access (tid=%llx,reg_id=%x(%s),type=%s)\n",
cur_tid, reg->id, mlxsw_reg_id_str(reg->id),
mlxsw_core_reg_access_type_str(type));
/* During initialization EMAD interface is not available to us,
* so we default to command interface. We switch to EMAD interface
* after setting the appropriate traps.
*/
if (!mlxsw_core->emad.use_emad)
err = mlxsw_core_reg_access_cmd(mlxsw_core, reg,
payload, type);
else
err = mlxsw_core_reg_access_emad(mlxsw_core, reg,
return mlxsw_core_reg_access_cmd(mlxsw_core, reg,
payload, type);
err = mlxsw_core_reg_access_emad(mlxsw_core, reg,
payload, type, &bulk_list,
mlxsw_core_reg_access_cb,
(unsigned long) payload);
if (err)
dev_err(mlxsw_core->bus_info->dev, "Reg access failed (tid=%llx,reg_id=%x(%s),type=%s)\n",
cur_tid, reg->id, mlxsw_reg_id_str(reg->id),
mlxsw_core_reg_access_type_str(type));
mutex_unlock(&mlxsw_core->emad.lock);
return err;
return err;
return mlxsw_reg_trans_bulk_wait(&bulk_list);
}
int mlxsw_reg_query(struct mlxsw_core *mlxsw_core,
@ -1374,6 +1666,24 @@ void mlxsw_core_port_fini(struct mlxsw_core_port *mlxsw_core_port)
}
EXPORT_SYMBOL(mlxsw_core_port_fini);
static void mlxsw_core_buf_dump_dbg(struct mlxsw_core *mlxsw_core,
const char *buf, size_t size)
{
__be32 *m = (__be32 *) buf;
int i;
int count = size / sizeof(__be32);
for (i = count - 1; i >= 0; i--)
if (m[i])
break;
i++;
count = i ? i : 1;
for (i = 0; i < count; i += 4)
dev_dbg(mlxsw_core->bus_info->dev, "%04x - %08x %08x %08x %08x\n",
i * 4, be32_to_cpu(m[i]), be32_to_cpu(m[i + 1]),
be32_to_cpu(m[i + 2]), be32_to_cpu(m[i + 3]));
}
int mlxsw_cmd_exec(struct mlxsw_core *mlxsw_core, u16 opcode, u8 opcode_mod,
u32 in_mod, bool out_mbox_direct,
char *in_mbox, size_t in_mbox_size,
@ -1416,17 +1726,35 @@ int mlxsw_cmd_exec(struct mlxsw_core *mlxsw_core, u16 opcode, u8 opcode_mod,
}
EXPORT_SYMBOL(mlxsw_cmd_exec);
int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay)
{
return queue_delayed_work(mlxsw_wq, dwork, delay);
}
EXPORT_SYMBOL(mlxsw_core_schedule_dw);
static int __init mlxsw_core_module_init(void)
{
mlxsw_core_dbg_root = debugfs_create_dir(mlxsw_core_driver_name, NULL);
if (!mlxsw_core_dbg_root)
int err;
mlxsw_wq = create_workqueue(mlxsw_core_driver_name);
if (!mlxsw_wq)
return -ENOMEM;
mlxsw_core_dbg_root = debugfs_create_dir(mlxsw_core_driver_name, NULL);
if (!mlxsw_core_dbg_root) {
err = -ENOMEM;
goto err_debugfs_create_dir;
}
return 0;
err_debugfs_create_dir:
destroy_workqueue(mlxsw_wq);
return err;
}
static void __exit mlxsw_core_module_exit(void)
{
debugfs_remove_recursive(mlxsw_core_dbg_root);
destroy_workqueue(mlxsw_wq);
}
module_init(mlxsw_core_module_init);

View file

@ -43,6 +43,7 @@
#include <linux/gfp.h>
#include <linux/types.h>
#include <linux/skbuff.h>
#include <linux/workqueue.h>
#include <net/devlink.h>
#include "trap.h"
@ -108,6 +109,19 @@ void mlxsw_core_event_listener_unregister(struct mlxsw_core *mlxsw_core,
const struct mlxsw_event_listener *el,
void *priv);
typedef void mlxsw_reg_trans_cb_t(struct mlxsw_core *mlxsw_core, char *payload,
size_t payload_len, unsigned long cb_priv);
int mlxsw_reg_trans_query(struct mlxsw_core *mlxsw_core,
const struct mlxsw_reg_info *reg, char *payload,
struct list_head *bulk_list,
mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv);
int mlxsw_reg_trans_write(struct mlxsw_core *mlxsw_core,
const struct mlxsw_reg_info *reg, char *payload,
struct list_head *bulk_list,
mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv);
int mlxsw_reg_trans_bulk_wait(struct list_head *bulk_list);
int mlxsw_reg_query(struct mlxsw_core *mlxsw_core,
const struct mlxsw_reg_info *reg, char *payload);
int mlxsw_reg_write(struct mlxsw_core *mlxsw_core,
@ -137,11 +151,22 @@ struct mlxsw_core_port {
struct devlink_port devlink_port;
};
static inline void *
mlxsw_core_port_driver_priv(struct mlxsw_core_port *mlxsw_core_port)
{
/* mlxsw_core_port is ensured to always be the first field in driver
* port structure.
*/
return mlxsw_core_port;
}
int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core,
struct mlxsw_core_port *mlxsw_core_port, u8 local_port,
struct net_device *dev, bool split, u32 split_group);
void mlxsw_core_port_fini(struct mlxsw_core_port *mlxsw_core_port);
int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay);
#define MLXSW_CONFIG_PROFILE_SWID_COUNT 8
struct mlxsw_swid_config {
@ -200,6 +225,37 @@ struct mlxsw_driver {
int (*port_split)(struct mlxsw_core *mlxsw_core, u8 local_port,
unsigned int count);
int (*port_unsplit)(struct mlxsw_core *mlxsw_core, u8 local_port);
int (*sb_pool_get)(struct mlxsw_core *mlxsw_core,
unsigned int sb_index, u16 pool_index,
struct devlink_sb_pool_info *pool_info);
int (*sb_pool_set)(struct mlxsw_core *mlxsw_core,
unsigned int sb_index, u16 pool_index, u32 size,
enum devlink_sb_threshold_type threshold_type);
int (*sb_port_pool_get)(struct mlxsw_core_port *mlxsw_core_port,
unsigned int sb_index, u16 pool_index,
u32 *p_threshold);
int (*sb_port_pool_set)(struct mlxsw_core_port *mlxsw_core_port,
unsigned int sb_index, u16 pool_index,
u32 threshold);
int (*sb_tc_pool_bind_get)(struct mlxsw_core_port *mlxsw_core_port,
unsigned int sb_index, u16 tc_index,
enum devlink_sb_pool_type pool_type,
u16 *p_pool_index, u32 *p_threshold);
int (*sb_tc_pool_bind_set)(struct mlxsw_core_port *mlxsw_core_port,
unsigned int sb_index, u16 tc_index,
enum devlink_sb_pool_type pool_type,
u16 pool_index, u32 threshold);
int (*sb_occ_snapshot)(struct mlxsw_core *mlxsw_core,
unsigned int sb_index);
int (*sb_occ_max_clear)(struct mlxsw_core *mlxsw_core,
unsigned int sb_index);
int (*sb_occ_port_pool_get)(struct mlxsw_core_port *mlxsw_core_port,
unsigned int sb_index, u16 pool_index,
u32 *p_cur, u32 *p_max);
int (*sb_occ_tc_port_bind_get)(struct mlxsw_core_port *mlxsw_core_port,
unsigned int sb_index, u16 tc_index,
enum devlink_sb_pool_type pool_type,
u32 *p_cur, u32 *p_max);
void (*txhdr_construct)(struct sk_buff *skb,
const struct mlxsw_tx_info *tx_info);
u8 txhdr_len;

View file

@ -3566,6 +3566,10 @@ MLXSW_ITEM32(reg, sbcm, dir, 0x00, 0, 2);
*/
MLXSW_ITEM32(reg, sbcm, min_buff, 0x18, 0, 24);
/* shared max_buff limits for dynamic threshold for SBCM, SBPM */
#define MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN 1
#define MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX 14
/* reg_sbcm_max_buff
* When the pool associated to the port-pg/tclass is configured to
* static, Maximum buffer size for the limiter configured in cells.
@ -3632,6 +3636,27 @@ MLXSW_ITEM32(reg, sbpm, pool, 0x00, 8, 4);
*/
MLXSW_ITEM32(reg, sbpm, dir, 0x00, 0, 2);
/* reg_sbpm_buff_occupancy
* Current buffer occupancy in cells.
* Access: RO
*/
MLXSW_ITEM32(reg, sbpm, buff_occupancy, 0x10, 0, 24);
/* reg_sbpm_clr
* Clear Max Buffer Occupancy
* When this bit is set, max_buff_occupancy field is cleared (and a
* new max value is tracked from the time the clear was performed).
* Access: OP
*/
MLXSW_ITEM32(reg, sbpm, clr, 0x14, 31, 1);
/* reg_sbpm_max_buff_occupancy
* Maximum value of buffer occupancy in cells monitored. Cleared by
* writing to the clr field.
* Access: RO
*/
MLXSW_ITEM32(reg, sbpm, max_buff_occupancy, 0x14, 0, 24);
/* reg_sbpm_min_buff
* Minimum buffer size for the limiter, in cells.
* Access: RW
@ -3652,17 +3677,25 @@ MLXSW_ITEM32(reg, sbpm, min_buff, 0x18, 0, 24);
MLXSW_ITEM32(reg, sbpm, max_buff, 0x1C, 0, 24);
static inline void mlxsw_reg_sbpm_pack(char *payload, u8 local_port, u8 pool,
enum mlxsw_reg_sbxx_dir dir,
enum mlxsw_reg_sbxx_dir dir, bool clr,
u32 min_buff, u32 max_buff)
{
MLXSW_REG_ZERO(sbpm, payload);
mlxsw_reg_sbpm_local_port_set(payload, local_port);
mlxsw_reg_sbpm_pool_set(payload, pool);
mlxsw_reg_sbpm_dir_set(payload, dir);
mlxsw_reg_sbpm_clr_set(payload, clr);
mlxsw_reg_sbpm_min_buff_set(payload, min_buff);
mlxsw_reg_sbpm_max_buff_set(payload, max_buff);
}
static inline void mlxsw_reg_sbpm_unpack(char *payload, u32 *p_buff_occupancy,
u32 *p_max_buff_occupancy)
{
*p_buff_occupancy = mlxsw_reg_sbpm_buff_occupancy_get(payload);
*p_max_buff_occupancy = mlxsw_reg_sbpm_max_buff_occupancy_get(payload);
}
/* SBMM - Shared Buffer Multicast Management Register
* --------------------------------------------------
* The SBMM register configures and retrieves the shared buffer allocation
@ -3718,6 +3751,104 @@ static inline void mlxsw_reg_sbmm_pack(char *payload, u8 prio, u32 min_buff,
mlxsw_reg_sbmm_pool_set(payload, pool);
}
/* SBSR - Shared Buffer Status Register
* ------------------------------------
* The SBSR register retrieves the shared buffer occupancy according to
* Port-Pool. Note that this register enables reading a large amount of data.
* It is the user's responsibility to limit the amount of data to ensure the
* response can match the maximum transfer unit. In case the response exceeds
* the maximum transport unit, it will be truncated with no special notice.
*/
#define MLXSW_REG_SBSR_ID 0xB005
#define MLXSW_REG_SBSR_BASE_LEN 0x5C /* base length, without records */
#define MLXSW_REG_SBSR_REC_LEN 0x8 /* record length */
#define MLXSW_REG_SBSR_REC_MAX_COUNT 120
#define MLXSW_REG_SBSR_LEN (MLXSW_REG_SBSR_BASE_LEN + \
MLXSW_REG_SBSR_REC_LEN * \
MLXSW_REG_SBSR_REC_MAX_COUNT)
static const struct mlxsw_reg_info mlxsw_reg_sbsr = {
.id = MLXSW_REG_SBSR_ID,
.len = MLXSW_REG_SBSR_LEN,
};
/* reg_sbsr_clr
* Clear Max Buffer Occupancy. When this bit is set, the max_buff_occupancy
* field is cleared (and a new max value is tracked from the time the clear
* was performed).
* Access: OP
*/
MLXSW_ITEM32(reg, sbsr, clr, 0x00, 31, 1);
/* reg_sbsr_ingress_port_mask
* Bit vector for all ingress network ports.
* Indicates which of the ports (for which the relevant bit is set)
* are affected by the set operation. Configuration of any other port
* does not change.
* Access: Index
*/
MLXSW_ITEM_BIT_ARRAY(reg, sbsr, ingress_port_mask, 0x10, 0x20, 1);
/* reg_sbsr_pg_buff_mask
* Bit vector for all switch priority groups.
* Indicates which of the priorities (for which the relevant bit is set)
* are affected by the set operation. Configuration of any other priority
* does not change.
* Range is 0..cap_max_pg_buffers - 1
* Access: Index
*/
MLXSW_ITEM_BIT_ARRAY(reg, sbsr, pg_buff_mask, 0x30, 0x4, 1);
/* reg_sbsr_egress_port_mask
* Bit vector for all egress network ports.
* Indicates which of the ports (for which the relevant bit is set)
* are affected by the set operation. Configuration of any other port
* does not change.
* Access: Index
*/
MLXSW_ITEM_BIT_ARRAY(reg, sbsr, egress_port_mask, 0x34, 0x20, 1);
/* reg_sbsr_tclass_mask
* Bit vector for all traffic classes.
* Indicates which of the traffic classes (for which the relevant bit is
* set) are affected by the set operation. Configuration of any other
* traffic class does not change.
* Range is 0..cap_max_tclass - 1
* Access: Index
*/
MLXSW_ITEM_BIT_ARRAY(reg, sbsr, tclass_mask, 0x54, 0x8, 1);
static inline void mlxsw_reg_sbsr_pack(char *payload, bool clr)
{
MLXSW_REG_ZERO(sbsr, payload);
mlxsw_reg_sbsr_clr_set(payload, clr);
}
/* reg_sbsr_rec_buff_occupancy
* Current buffer occupancy in cells.
* Access: RO
*/
MLXSW_ITEM32_INDEXED(reg, sbsr, rec_buff_occupancy, MLXSW_REG_SBSR_BASE_LEN,
0, 24, MLXSW_REG_SBSR_REC_LEN, 0x00, false);
/* reg_sbsr_rec_max_buff_occupancy
* Maximum value of buffer occupancy in cells monitored. Cleared by
* writing to the clr field.
* Access: RO
*/
MLXSW_ITEM32_INDEXED(reg, sbsr, rec_max_buff_occupancy, MLXSW_REG_SBSR_BASE_LEN,
0, 24, MLXSW_REG_SBSR_REC_LEN, 0x04, false);
static inline void mlxsw_reg_sbsr_rec_unpack(char *payload, int rec_index,
u32 *p_buff_occupancy,
u32 *p_max_buff_occupancy)
{
*p_buff_occupancy =
mlxsw_reg_sbsr_rec_buff_occupancy_get(payload, rec_index);
*p_max_buff_occupancy =
mlxsw_reg_sbsr_rec_max_buff_occupancy_get(payload, rec_index);
}
static inline const char *mlxsw_reg_id_str(u16 reg_id)
{
switch (reg_id) {
@ -3813,6 +3944,8 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id)
return "SBPM";
case MLXSW_REG_SBMM_ID:
return "SBMM";
case MLXSW_REG_SBSR_ID:
return "SBSR";
default:
return "*UNKNOWN*";
}

View file

@ -2434,6 +2434,7 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
err_switchdev_init:
err_lag_init:
mlxsw_sp_buffers_fini(mlxsw_sp);
err_buffers_init:
err_flood_init:
mlxsw_sp_traps_fini(mlxsw_sp);
@ -2448,6 +2449,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
mlxsw_sp_buffers_fini(mlxsw_sp);
mlxsw_sp_switchdev_fini(mlxsw_sp);
mlxsw_sp_traps_fini(mlxsw_sp);
mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE);
@ -2491,16 +2493,26 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = {
};
static struct mlxsw_driver mlxsw_sp_driver = {
.kind = MLXSW_DEVICE_KIND_SPECTRUM,
.owner = THIS_MODULE,
.priv_size = sizeof(struct mlxsw_sp),
.init = mlxsw_sp_init,
.fini = mlxsw_sp_fini,
.port_split = mlxsw_sp_port_split,
.port_unsplit = mlxsw_sp_port_unsplit,
.txhdr_construct = mlxsw_sp_txhdr_construct,
.txhdr_len = MLXSW_TXHDR_LEN,
.profile = &mlxsw_sp_config_profile,
.kind = MLXSW_DEVICE_KIND_SPECTRUM,
.owner = THIS_MODULE,
.priv_size = sizeof(struct mlxsw_sp),
.init = mlxsw_sp_init,
.fini = mlxsw_sp_fini,
.port_split = mlxsw_sp_port_split,
.port_unsplit = mlxsw_sp_port_unsplit,
.sb_pool_get = mlxsw_sp_sb_pool_get,
.sb_pool_set = mlxsw_sp_sb_pool_set,
.sb_port_pool_get = mlxsw_sp_sb_port_pool_get,
.sb_port_pool_set = mlxsw_sp_sb_port_pool_set,
.sb_tc_pool_bind_get = mlxsw_sp_sb_tc_pool_bind_get,
.sb_tc_pool_bind_set = mlxsw_sp_sb_tc_pool_bind_set,
.sb_occ_snapshot = mlxsw_sp_sb_occ_snapshot,
.sb_occ_max_clear = mlxsw_sp_sb_occ_max_clear,
.sb_occ_port_pool_get = mlxsw_sp_sb_occ_port_pool_get,
.sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get,
.txhdr_construct = mlxsw_sp_txhdr_construct,
.txhdr_len = MLXSW_TXHDR_LEN,
.profile = &mlxsw_sp_config_profile,
};
static int

View file

@ -65,6 +65,7 @@
#define MLXSW_SP_BYTES_PER_CELL 96
#define MLXSW_SP_BYTES_TO_CELLS(b) DIV_ROUND_UP(b, MLXSW_SP_BYTES_PER_CELL)
#define MLXSW_SP_CELLS_TO_BYTES(c) (c * MLXSW_SP_BYTES_PER_CELL)
/* Maximum delay buffer needed in case of PAUSE frames, in cells.
* Assumes 100m cable and maximum MTU.
@ -117,6 +118,40 @@ static inline bool mlxsw_sp_fid_is_vfid(u16 fid)
return fid >= MLXSW_SP_VFID_BASE;
}
struct mlxsw_sp_sb_pr {
enum mlxsw_reg_sbpr_mode mode;
u32 size;
};
struct mlxsw_cp_sb_occ {
u32 cur;
u32 max;
};
struct mlxsw_sp_sb_cm {
u32 min_buff;
u32 max_buff;
u8 pool;
struct mlxsw_cp_sb_occ occ;
};
struct mlxsw_sp_sb_pm {
u32 min_buff;
u32 max_buff;
struct mlxsw_cp_sb_occ occ;
};
#define MLXSW_SP_SB_POOL_COUNT 4
#define MLXSW_SP_SB_TC_COUNT 8
struct mlxsw_sp_sb {
struct mlxsw_sp_sb_pr prs[2][MLXSW_SP_SB_POOL_COUNT];
struct {
struct mlxsw_sp_sb_cm cms[2][MLXSW_SP_SB_TC_COUNT];
struct mlxsw_sp_sb_pm pms[2][MLXSW_SP_SB_POOL_COUNT];
} ports[MLXSW_PORT_MAX_PORTS];
};
struct mlxsw_sp {
struct {
struct list_head list;
@ -147,6 +182,7 @@ struct mlxsw_sp {
struct mlxsw_sp_upper master_bridge;
struct mlxsw_sp_upper lags[MLXSW_SP_LAG_MAX];
u8 port_to_module[MLXSW_PORT_MAX_PORTS];
struct mlxsw_sp_sb sb;
};
static inline struct mlxsw_sp_upper *
@ -277,7 +313,39 @@ enum mlxsw_sp_flood_table {
};
int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp);
void mlxsw_sp_buffers_fini(struct mlxsw_sp *mlxsw_sp);
int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port);
int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core,
unsigned int sb_index, u16 pool_index,
struct devlink_sb_pool_info *pool_info);
int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core,
unsigned int sb_index, u16 pool_index, u32 size,
enum devlink_sb_threshold_type threshold_type);
int mlxsw_sp_sb_port_pool_get(struct mlxsw_core_port *mlxsw_core_port,
unsigned int sb_index, u16 pool_index,
u32 *p_threshold);
int mlxsw_sp_sb_port_pool_set(struct mlxsw_core_port *mlxsw_core_port,
unsigned int sb_index, u16 pool_index,
u32 threshold);
int mlxsw_sp_sb_tc_pool_bind_get(struct mlxsw_core_port *mlxsw_core_port,
unsigned int sb_index, u16 tc_index,
enum devlink_sb_pool_type pool_type,
u16 *p_pool_index, u32 *p_threshold);
int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port,
unsigned int sb_index, u16 tc_index,
enum devlink_sb_pool_type pool_type,
u16 pool_index, u32 threshold);
int mlxsw_sp_sb_occ_snapshot(struct mlxsw_core *mlxsw_core,
unsigned int sb_index);
int mlxsw_sp_sb_occ_max_clear(struct mlxsw_core *mlxsw_core,
unsigned int sb_index);
int mlxsw_sp_sb_occ_port_pool_get(struct mlxsw_core_port *mlxsw_core_port,
unsigned int sb_index, u16 pool_index,
u32 *p_cur, u32 *p_max);
int mlxsw_sp_sb_occ_tc_port_bind_get(struct mlxsw_core_port *mlxsw_core_port,
unsigned int sb_index, u16 tc_index,
enum devlink_sb_pool_type pool_type,
u32 *p_cur, u32 *p_max);
int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp);
void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp);

File diff suppressed because it is too large Load diff

View file

@ -1430,8 +1430,8 @@ static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp,
static void mlxsw_sp_fdb_notify_work_schedule(struct mlxsw_sp *mlxsw_sp)
{
schedule_delayed_work(&mlxsw_sp->fdb_notify.dw,
msecs_to_jiffies(mlxsw_sp->fdb_notify.interval));
mlxsw_core_schedule_dw(&mlxsw_sp->fdb_notify.dw,
msecs_to_jiffies(mlxsw_sp->fdb_notify.interval));
}
static void mlxsw_sp_fdb_notify_work(struct work_struct *work)

View file

@ -24,6 +24,7 @@ struct devlink_ops;
struct devlink {
struct list_head list;
struct list_head port_list;
struct list_head sb_list;
const struct devlink_ops *ops;
struct device *dev;
possible_net_t _net;
@ -42,6 +43,12 @@ struct devlink_port {
u32 split_group;
};
struct devlink_sb_pool_info {
enum devlink_sb_pool_type pool_type;
u32 size;
enum devlink_sb_threshold_type threshold_type;
};
struct devlink_ops {
size_t priv_size;
int (*port_type_set)(struct devlink_port *devlink_port,
@ -49,6 +56,40 @@ struct devlink_ops {
int (*port_split)(struct devlink *devlink, unsigned int port_index,
unsigned int count);
int (*port_unsplit)(struct devlink *devlink, unsigned int port_index);
int (*sb_pool_get)(struct devlink *devlink, unsigned int sb_index,
u16 pool_index,
struct devlink_sb_pool_info *pool_info);
int (*sb_pool_set)(struct devlink *devlink, unsigned int sb_index,
u16 pool_index, u32 size,
enum devlink_sb_threshold_type threshold_type);
int (*sb_port_pool_get)(struct devlink_port *devlink_port,
unsigned int sb_index, u16 pool_index,
u32 *p_threshold);
int (*sb_port_pool_set)(struct devlink_port *devlink_port,
unsigned int sb_index, u16 pool_index,
u32 threshold);
int (*sb_tc_pool_bind_get)(struct devlink_port *devlink_port,
unsigned int sb_index,
u16 tc_index,
enum devlink_sb_pool_type pool_type,
u16 *p_pool_index, u32 *p_threshold);
int (*sb_tc_pool_bind_set)(struct devlink_port *devlink_port,
unsigned int sb_index,
u16 tc_index,
enum devlink_sb_pool_type pool_type,
u16 pool_index, u32 threshold);
int (*sb_occ_snapshot)(struct devlink *devlink,
unsigned int sb_index);
int (*sb_occ_max_clear)(struct devlink *devlink,
unsigned int sb_index);
int (*sb_occ_port_pool_get)(struct devlink_port *devlink_port,
unsigned int sb_index, u16 pool_index,
u32 *p_cur, u32 *p_max);
int (*sb_occ_tc_port_bind_get)(struct devlink_port *devlink_port,
unsigned int sb_index,
u16 tc_index,
enum devlink_sb_pool_type pool_type,
u32 *p_cur, u32 *p_max);
};
static inline void *devlink_priv(struct devlink *devlink)
@ -82,6 +123,11 @@ void devlink_port_type_ib_set(struct devlink_port *devlink_port,
void devlink_port_type_clear(struct devlink_port *devlink_port);
void devlink_port_split_set(struct devlink_port *devlink_port,
u32 split_group);
int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
u32 size, u16 ingress_pools_count,
u16 egress_pools_count, u16 ingress_tc_count,
u16 egress_tc_count);
void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index);
#else
@ -135,6 +181,19 @@ static inline void devlink_port_split_set(struct devlink_port *devlink_port,
{
}
static inline int devlink_sb_register(struct devlink *devlink,
unsigned int sb_index, u32 size,
u16 ingress_pools_count,
u16 egress_pools_count, u16 tc_count)
{
return 0;
}
static inline void devlink_sb_unregister(struct devlink *devlink,
unsigned int sb_index)
{
}
#endif
#endif /* _NET_DEVLINK_H_ */

View file

@ -33,6 +33,30 @@ enum devlink_command {
DEVLINK_CMD_PORT_SPLIT,
DEVLINK_CMD_PORT_UNSPLIT,
DEVLINK_CMD_SB_GET, /* can dump */
DEVLINK_CMD_SB_SET,
DEVLINK_CMD_SB_NEW,
DEVLINK_CMD_SB_DEL,
DEVLINK_CMD_SB_POOL_GET, /* can dump */
DEVLINK_CMD_SB_POOL_SET,
DEVLINK_CMD_SB_POOL_NEW,
DEVLINK_CMD_SB_POOL_DEL,
DEVLINK_CMD_SB_PORT_POOL_GET, /* can dump */
DEVLINK_CMD_SB_PORT_POOL_SET,
DEVLINK_CMD_SB_PORT_POOL_NEW,
DEVLINK_CMD_SB_PORT_POOL_DEL,
DEVLINK_CMD_SB_TC_POOL_BIND_GET, /* can dump */
DEVLINK_CMD_SB_TC_POOL_BIND_SET,
DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
DEVLINK_CMD_SB_TC_POOL_BIND_DEL,
/* Shared buffer occupancy monitoring commands */
DEVLINK_CMD_SB_OCC_SNAPSHOT,
DEVLINK_CMD_SB_OCC_MAX_CLEAR,
/* add new commands above here */
__DEVLINK_CMD_MAX,
@ -46,6 +70,31 @@ enum devlink_port_type {
DEVLINK_PORT_TYPE_IB,
};
enum devlink_sb_pool_type {
DEVLINK_SB_POOL_TYPE_INGRESS,
DEVLINK_SB_POOL_TYPE_EGRESS,
};
/* static threshold - limiting the maximum number of bytes.
* dynamic threshold - limiting the maximum number of bytes
* based on the currently available free space in the shared buffer pool.
* In this mode, the maximum quota is calculated based
* on the following formula:
* max_quota = alpha / (1 + alpha) * Free_Buffer
* While Free_Buffer is the amount of none-occupied buffer associated to
* the relevant pool.
* The value range which can be passed is 0-20 and serves
* for computation of alpha by following formula:
* alpha = 2 ^ (passed_value - 10)
*/
enum devlink_sb_threshold_type {
DEVLINK_SB_THRESHOLD_TYPE_STATIC,
DEVLINK_SB_THRESHOLD_TYPE_DYNAMIC,
};
#define DEVLINK_SB_THRESHOLD_TO_ALPHA_MAX 20
enum devlink_attr {
/* don't change the order or add anything between, this is ABI! */
DEVLINK_ATTR_UNSPEC,
@ -62,6 +111,20 @@ enum devlink_attr {
DEVLINK_ATTR_PORT_IBDEV_NAME, /* string */
DEVLINK_ATTR_PORT_SPLIT_COUNT, /* u32 */
DEVLINK_ATTR_PORT_SPLIT_GROUP, /* u32 */
DEVLINK_ATTR_SB_INDEX, /* u32 */
DEVLINK_ATTR_SB_SIZE, /* u32 */
DEVLINK_ATTR_SB_INGRESS_POOL_COUNT, /* u16 */
DEVLINK_ATTR_SB_EGRESS_POOL_COUNT, /* u16 */
DEVLINK_ATTR_SB_INGRESS_TC_COUNT, /* u16 */
DEVLINK_ATTR_SB_EGRESS_TC_COUNT, /* u16 */
DEVLINK_ATTR_SB_POOL_INDEX, /* u16 */
DEVLINK_ATTR_SB_POOL_TYPE, /* u8 */
DEVLINK_ATTR_SB_POOL_SIZE, /* u32 */
DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE, /* u8 */
DEVLINK_ATTR_SB_THRESHOLD, /* u32 */
DEVLINK_ATTR_SB_TC_INDEX, /* u16 */
DEVLINK_ATTR_SB_OCC_CUR, /* u32 */
DEVLINK_ATTR_SB_OCC_MAX, /* u32 */
/* add new attributes above here, update the policy in devlink.c */

File diff suppressed because it is too large Load diff