diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst index 38e94ed65936..29ad304e6fba 100644 --- a/Documentation/networking/devlink/mlx5.rst +++ b/Documentation/networking/devlink/mlx5.rst @@ -17,6 +17,7 @@ Parameters - Validation * - ``enable_roce`` - driverinit + - Type: Boolean * - ``io_eq_size`` - driverinit - The range is between 64 and 4096. diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 91eb615b89ee..86842cd580ba 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1541,16 +1541,10 @@ int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) eq->irq_nb.notifier_call = mlx5_ib_eq_pf_int; param = (struct mlx5_eq_param) { - .irq_index = MLX5_IRQ_EQ_CTRL, .nent = MLX5_IB_NUM_PF_EQE, }; param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_FAULT; - if (!zalloc_cpumask_var(¶m.affinity, GFP_KERNEL)) { - err = -ENOMEM; - goto err_wq; - } eq->core = mlx5_eq_create_generic(dev->mdev, ¶m); - free_cpumask_var(param.affinity); if (IS_ERR(eq->core)) { err = PTR_ERR(eq->core); goto err_wq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 33904bc87efa..fcfd38fa9e6c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -109,7 +109,7 @@ mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o # # SF device # -mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o +mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o irq_affinity.o # # SF manager diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index c5f959a9e14b..812e6810cb3b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -984,7 +984,7 @@ struct mlx5e_profile { }; #define mlx5e_profile_feature_cap(profile, feature) \ - ((profile)->features & (MLX5E_PROFILE_FEATURE_## feature)) + ((profile)->features & BIT(MLX5E_PROFILE_FEATURE_##feature)) void mlx5e_build_ptys2ethtool_map(void); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c index 074ffa4fa5af..b4f3bd7d346e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c @@ -120,14 +120,14 @@ static void mlx5e_hv_vhca_stats_cleanup(struct mlx5_hv_vhca_agent *agent) cancel_delayed_work_sync(&priv->stats_agent.work); } -int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) +void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) { int buf_len = mlx5e_hv_vhca_stats_buf_size(priv); struct mlx5_hv_vhca_agent *agent; priv->stats_agent.buf = kvzalloc(buf_len, GFP_KERNEL); if (!priv->stats_agent.buf) - return -ENOMEM; + return; agent = mlx5_hv_vhca_agent_create(priv->mdev->hv_vhca, MLX5_HV_VHCA_AGENT_STATS, @@ -142,13 +142,11 @@ int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) PTR_ERR(agent)); kvfree(priv->stats_agent.buf); - return IS_ERR_OR_NULL(agent); + return; } priv->stats_agent.agent = agent; INIT_DELAYED_WORK(&priv->stats_agent.work, mlx5e_hv_vhca_stats_work); - - return 0; } void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h index 664463faf77b..29c8c6d3260f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h @@ -7,19 +7,12 @@ #if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) -int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv); +void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv); void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv); #else - -static inline int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) -{ - return 0; -} - -static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) -{ -} +static inline void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) {} +static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) {} #endif #endif /* __MLX5_EN_STATS_VHCA_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c index a0832b86016c..c614fc7fdc9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c @@ -45,14 +45,10 @@ verify_uplink_forwarding(struct mlx5e_priv *priv, termination_table_raw_traffic)) { NL_SET_ERR_MSG_MOD(extack, "devices are both uplink, can't offload forwarding"); - pr_err("devices %s %s are both uplink, can't offload forwarding\n", - priv->netdev->name, out_dev->name); return -EOPNOTSUPP; } else if (out_dev != rep_priv->netdev) { NL_SET_ERR_MSG_MOD(extack, "devices are not the same uplink, can't offload forwarding"); - pr_err("devices %s %s are both uplink but not the same, can't offload forwarding\n", - priv->netdev->name, out_dev->name); return -EOPNOTSUPP; } return 0; @@ -160,10 +156,6 @@ tc_act_can_offload_mirred(struct mlx5e_tc_act_parse_state *parse_state, } NL_SET_ERR_MSG_MOD(extack, "devices are not on same switch HW, can't offload forwarding"); - netdev_warn(priv->netdev, - "devices %s %s not on same switch HW, can't offload forwarding\n", - netdev_name(priv->netdev), - out_dev->name); return false; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 536fcb2c5e90..57d755db1cf5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1883,24 +1883,19 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, bool is_rx_cq) { struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_params new_params; - bool mode_changed; u8 cq_period_mode, current_cq_period_mode; + struct mlx5e_params new_params; + + if (enable && !MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe)) + return -EOPNOTSUPP; + + cq_period_mode = cqe_mode_to_period_mode(enable); - cq_period_mode = enable ? - MLX5_CQ_PERIOD_MODE_START_FROM_CQE : - MLX5_CQ_PERIOD_MODE_START_FROM_EQE; current_cq_period_mode = is_rx_cq ? priv->channels.params.rx_cq_moderation.cq_period_mode : priv->channels.params.tx_cq_moderation.cq_period_mode; - mode_changed = cq_period_mode != current_cq_period_mode; - if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE && - !MLX5_CAP_GEN(mdev, cq_period_start_from_cqe)) - return -EOPNOTSUPP; - - if (!mode_changed) + if (cq_period_mode == current_cq_period_mode) return 0; new_params = priv->channels.params; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 31c911182498..d36489363a26 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3605,11 +3605,6 @@ static int set_feature_hw_gro(struct net_device *netdev, bool enable) new_params = priv->channels.params; if (enable) { - if (MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { - netdev_warn(netdev, "Can't set HW-GRO when CQE compress is active\n"); - err = -EINVAL; - goto out; - } new_params.packet_merge.type = MLX5E_PACKET_MERGE_SHAMPO; new_params.packet_merge.shampo.match_criteria_type = MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED; @@ -3871,6 +3866,11 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, features &= ~NETIF_F_RXHASH; if (netdev->features & NETIF_F_RXHASH) netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n"); + + if (features & NETIF_F_GRO_HW) { + netdev_warn(netdev, "Disabling HW-GRO, not supported when CQE compress is active\n"); + features &= ~NETIF_F_GRO_HW; + } } if (mlx5e_is_uplink_rep(priv)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index f09b57c31ed7..96e260fd7987 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1603,6 +1603,12 @@ static void trigger_report(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) } } +static void mlx5e_handle_rx_err_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + trigger_report(rq, cqe); + rq->stats->wqe_err++; +} + static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { struct mlx5_wq_cyc *wq = &rq->wqe.wq; @@ -1616,8 +1622,7 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) cqe_bcnt = be32_to_cpu(cqe->byte_cnt); if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { - trigger_report(rq, cqe); - rq->stats->wqe_err++; + mlx5e_handle_rx_err_cqe(rq, cqe); goto free_wqe; } @@ -1670,7 +1675,7 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) cqe_bcnt = be32_to_cpu(cqe->byte_cnt); if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { - rq->stats->wqe_err++; + mlx5e_handle_rx_err_cqe(rq, cqe); goto free_wqe; } @@ -1719,8 +1724,7 @@ static void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 wi->consumed_strides += cstrides; if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { - trigger_report(rq, cqe); - rq->stats->wqe_err++; + mlx5e_handle_rx_err_cqe(rq, cqe); goto mpwrq_cqe_out; } @@ -1988,8 +1992,7 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq wi->consumed_strides += cstrides; if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { - trigger_report(rq, cqe); - stats->wqe_err++; + mlx5e_handle_rx_err_cqe(rq, cqe); goto mpwrq_cqe_out; } @@ -2058,8 +2061,7 @@ static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cq wi->consumed_strides += cstrides; if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { - trigger_report(rq, cqe); - rq->stats->wqe_err++; + mlx5e_handle_rx_err_cqe(rq, cqe); goto mpwrq_cqe_out; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 73fcd9fb17dd..7e7c0c1019f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -35,6 +35,7 @@ #include "en_accel/tls.h" #include "en_accel/en_accel.h" #include "en/ptp.h" +#include "en/port.h" static unsigned int stats_grps_num(struct mlx5e_priv *priv) { @@ -1158,12 +1159,99 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(phy) mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); } -void mlx5e_stats_fec_get(struct mlx5e_priv *priv, - struct ethtool_fec_stats *fec_stats) +static int fec_num_lanes(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(pmlp_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(pmlp_reg)] = {}; + int err; + + MLX5_SET(pmlp_reg, in, local_port, 1); + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_PMLP, 0, 0); + if (err) + return 0; + + return MLX5_GET(pmlp_reg, out, width); +} + +static int fec_active_mode(struct mlx5_core_dev *mdev) +{ + unsigned long fec_active_long; + u32 fec_active; + + if (mlx5e_get_fec_mode(mdev, &fec_active, NULL)) + return MLX5E_FEC_NOFEC; + + fec_active_long = fec_active; + return find_first_bit(&fec_active_long, sizeof(unsigned long) * BITS_PER_BYTE); +} + +#define MLX5E_STATS_SET_FEC_BLOCK(idx) ({ \ + fec_stats->corrected_blocks.lanes[(idx)] = \ + MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, \ + fc_fec_corrected_blocks_lane##idx); \ + fec_stats->uncorrectable_blocks.lanes[(idx)] = \ + MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, \ + fc_fec_uncorrectable_blocks_lane##idx); \ +}) + +static void fec_set_fc_stats(struct ethtool_fec_stats *fec_stats, + u32 *ppcnt, u8 lanes) +{ + if (lanes > 3) { /* 4 lanes */ + MLX5E_STATS_SET_FEC_BLOCK(3); + MLX5E_STATS_SET_FEC_BLOCK(2); + } + if (lanes > 1) /* 2 lanes */ + MLX5E_STATS_SET_FEC_BLOCK(1); + if (lanes > 0) /* 1 lane */ + MLX5E_STATS_SET_FEC_BLOCK(0); +} + +static void fec_set_rs_stats(struct ethtool_fec_stats *fec_stats, u32 *ppcnt) +{ + fec_stats->corrected_blocks.total = + MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, + rs_fec_corrected_blocks); + fec_stats->uncorrectable_blocks.total = + MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, + rs_fec_uncorrectable_blocks); +} + +static void fec_set_block_stats(struct mlx5e_priv *priv, + struct ethtool_fec_stats *fec_stats) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 out[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + int mode = fec_active_mode(mdev); + + if (mode == MLX5E_FEC_NOFEC) + return; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP); + if (mlx5_core_access_reg(mdev, in, sz, outl, sz, MLX5_REG_PPCNT, 0, 0)) + return; + + switch (mode) { + case MLX5E_FEC_RS_528_514: + case MLX5E_FEC_RS_544_514: + case MLX5E_FEC_LLRS_272_257_1: + fec_set_rs_stats(fec_stats, out); + return; + case MLX5E_FEC_FIRECODE: + fec_set_fc_stats(fec_stats, out, fec_num_lanes(mdev)); + } +} + +static void fec_set_corrected_bits_total(struct mlx5e_priv *priv, + struct ethtool_fec_stats *fec_stats) { u32 ppcnt_phy_statistical[MLX5_ST_SZ_DW(ppcnt_reg)]; struct mlx5_core_dev *mdev = priv->mdev; - u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) @@ -1181,6 +1269,13 @@ void mlx5e_stats_fec_get(struct mlx5e_priv *priv, phy_corrected_bits); } +void mlx5e_stats_fec_get(struct mlx5e_priv *priv, + struct ethtool_fec_stats *fec_stats) +{ + fec_set_corrected_bits_total(priv, fec_stats); + fec_set_block_stats(priv, fec_stats); +} + #define PPORT_ETH_EXT_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.eth_extended_cntrs_grp_data_layout.c##_high) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index b695aad71ee1..48a45aa54a3c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -59,6 +59,8 @@ struct mlx5_eq_table { struct mutex lock; /* sync async eqs creations */ int num_comp_eqs; struct mlx5_irq_table *irq_table; + struct mlx5_irq **comp_irqs; + struct mlx5_irq *ctrl_irq; #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *rmap; #endif @@ -266,8 +268,8 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; u8 log_eq_stride = ilog2(MLX5_EQE_SIZE); struct mlx5_priv *priv = &dev->priv; - u16 vecidx = param->irq_index; __be64 *pas; + u16 vecidx; void *eqc; int inlen; u32 *in; @@ -289,20 +291,16 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, mlx5_init_fbc(eq->frag_buf.frags, log_eq_stride, log_eq_size, &eq->fbc); init_eq_buf(eq); - eq->irq = mlx5_irq_request(dev, vecidx, param->affinity); - if (IS_ERR(eq->irq)) { - err = PTR_ERR(eq->irq); - goto err_buf; - } - + eq->irq = param->irq; vecidx = mlx5_irq_get_index(eq->irq); + inlen = MLX5_ST_SZ_BYTES(create_eq_in) + MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->frag_buf.npages; in = kvzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; - goto err_irq; + goto err_buf; } pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas); @@ -346,8 +344,6 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, err_in: kvfree(in); -err_irq: - mlx5_irq_release(eq->irq); err_buf: mlx5_frag_buf_free(dev, &eq->frag_buf); return err; @@ -401,7 +397,6 @@ static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) if (err) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", eq->eqn); - mlx5_irq_release(eq->irq); mlx5_frag_buf_free(dev, &eq->frag_buf); return err; @@ -594,11 +589,8 @@ setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq, eq->irq_nb.notifier_call = mlx5_eq_async_int; spin_lock_init(&eq->lock); - if (!zalloc_cpumask_var(¶m->affinity, GFP_KERNEL)) - return -ENOMEM; err = create_async_eq(dev, &eq->core, param); - free_cpumask_var(param->affinity); if (err) { mlx5_core_warn(dev, "failed to create %s EQ %d\n", name, err); return err; @@ -643,11 +635,18 @@ static int create_async_eqs(struct mlx5_core_dev *dev) struct mlx5_eq_param param = {}; int err; + /* All the async_eqs are using single IRQ, request one IRQ and share its + * index among all the async_eqs of this device. + */ + table->ctrl_irq = mlx5_ctrl_irq_request(dev); + if (IS_ERR(table->ctrl_irq)) + return PTR_ERR(table->ctrl_irq); + MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR); mlx5_eq_notifier_register(dev, &table->cq_err_nb); param = (struct mlx5_eq_param) { - .irq_index = MLX5_IRQ_EQ_CTRL, + .irq = table->ctrl_irq, .nent = MLX5_NUM_CMD_EQE, .mask[0] = 1ull << MLX5_EVENT_TYPE_CMD, }; @@ -660,7 +659,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); param = (struct mlx5_eq_param) { - .irq_index = MLX5_IRQ_EQ_CTRL, + .irq = table->ctrl_irq, .nent = async_eq_depth_devlink_param_get(dev), }; @@ -670,7 +669,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) goto err2; param = (struct mlx5_eq_param) { - .irq_index = MLX5_IRQ_EQ_CTRL, + .irq = table->ctrl_irq, .nent = /* TODO: sriov max_vf + */ 1, .mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST, }; @@ -689,6 +688,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) err1: mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); + mlx5_ctrl_irq_release(table->ctrl_irq); return err; } @@ -703,6 +703,7 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) cleanup_async_eq(dev, &table->cmd_eq, "cmd"); mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); + mlx5_ctrl_irq_release(table->ctrl_irq); } struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev) @@ -730,12 +731,10 @@ mlx5_eq_create_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL); int err; - if (!cpumask_available(param->affinity)) - return ERR_PTR(-EINVAL); - if (!eq) return ERR_PTR(-ENOMEM); + param->irq = dev->priv.eq_table->ctrl_irq; err = create_async_eq(dev, eq, param); if (err) { kvfree(eq); @@ -795,6 +794,54 @@ void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm) } EXPORT_SYMBOL(mlx5_eq_update_ci); +static void comp_irqs_release(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + + if (mlx5_core_is_sf(dev)) + mlx5_irq_affinity_irqs_release(dev, table->comp_irqs, table->num_comp_eqs); + else + mlx5_irqs_release_vectors(table->comp_irqs, table->num_comp_eqs); + kfree(table->comp_irqs); +} + +static int comp_irqs_request(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + int ncomp_eqs = table->num_comp_eqs; + u16 *cpus; + int ret; + int i; + + ncomp_eqs = table->num_comp_eqs; + table->comp_irqs = kcalloc(ncomp_eqs, sizeof(*table->comp_irqs), GFP_KERNEL); + if (!table->comp_irqs) + return -ENOMEM; + if (mlx5_core_is_sf(dev)) { + ret = mlx5_irq_affinity_irqs_request_auto(dev, ncomp_eqs, table->comp_irqs); + if (ret < 0) + goto free_irqs; + return ret; + } + + cpus = kcalloc(ncomp_eqs, sizeof(*cpus), GFP_KERNEL); + if (!cpus) { + ret = -ENOMEM; + goto free_irqs; + } + for (i = 0; i < ncomp_eqs; i++) + cpus[i] = cpumask_local_spread(i, dev->priv.numa_node); + ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs); + kfree(cpus); + if (ret < 0) + goto free_irqs; + return ret; + +free_irqs: + kfree(table->comp_irqs); + return ret; +} + static void destroy_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; @@ -809,6 +856,7 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) tasklet_disable(&eq->tasklet_ctx.task); kfree(eq); } + comp_irqs_release(dev); } static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev) @@ -835,12 +883,13 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) int err; int i; + ncomp_eqs = comp_irqs_request(dev); + if (ncomp_eqs < 0) + return ncomp_eqs; INIT_LIST_HEAD(&table->comp_eqs_list); - ncomp_eqs = table->num_comp_eqs; nent = comp_eq_depth_devlink_param_get(dev); for (i = 0; i < ncomp_eqs; i++) { struct mlx5_eq_param param = {}; - int vecidx = i; eq = kzalloc(sizeof(*eq), GFP_KERNEL); if (!eq) { @@ -855,18 +904,11 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) eq->irq_nb.notifier_call = mlx5_eq_comp_int; param = (struct mlx5_eq_param) { - .irq_index = vecidx, + .irq = table->comp_irqs[i], .nent = nent, }; - if (!zalloc_cpumask_var(¶m.affinity, GFP_KERNEL)) { - err = -ENOMEM; - goto clean_eq; - } - cpumask_set_cpu(cpumask_local_spread(i, dev->priv.numa_node), - param.affinity); err = create_map_eq(dev, &eq->core, ¶m); - free_cpumask_var(param.affinity); if (err) goto clean_eq; err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb); @@ -880,7 +922,9 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) list_add_tail(&eq->list, &table->comp_eqs_list); } + table->num_comp_eqs = ncomp_eqs; return 0; + clean_eq: kfree(eq); clean: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c index 2b52f7c09152..9d17206d1625 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c @@ -431,7 +431,7 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, int err = 0; if (!mlx5_esw_allowed(esw)) - return -EPERM; + return vlan ? -EPERM : 0; if (vlan || qos) set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c new file mode 100644 index 000000000000..380a208ab137 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "mlx5_core.h" +#include "mlx5_irq.h" +#include "pci_irq.h" + +static void cpu_put(struct mlx5_irq_pool *pool, int cpu) +{ + pool->irqs_per_cpu[cpu]--; +} + +static void cpu_get(struct mlx5_irq_pool *pool, int cpu) +{ + pool->irqs_per_cpu[cpu]++; +} + +/* Gets the least loaded CPU. e.g.: the CPU with least IRQs bound to it */ +static int cpu_get_least_loaded(struct mlx5_irq_pool *pool, + const struct cpumask *req_mask) +{ + int best_cpu = -1; + int cpu; + + for_each_cpu_and(cpu, req_mask, cpu_online_mask) { + /* CPU has zero IRQs on it. No need to search any more CPUs. */ + if (!pool->irqs_per_cpu[cpu]) { + best_cpu = cpu; + break; + } + if (best_cpu < 0) + best_cpu = cpu; + if (pool->irqs_per_cpu[cpu] < pool->irqs_per_cpu[best_cpu]) + best_cpu = cpu; + } + if (best_cpu == -1) { + /* There isn't online CPUs in req_mask */ + mlx5_core_err(pool->dev, "NO online CPUs in req_mask (%*pbl)\n", + cpumask_pr_args(req_mask)); + best_cpu = cpumask_first(cpu_online_mask); + } + pool->irqs_per_cpu[best_cpu]++; + return best_cpu; +} + +/* Creating an IRQ from irq_pool */ +static struct mlx5_irq * +irq_pool_request_irq(struct mlx5_irq_pool *pool, const struct cpumask *req_mask) +{ + cpumask_var_t auto_mask; + struct mlx5_irq *irq; + u32 irq_index; + int err; + + if (!zalloc_cpumask_var(&auto_mask, GFP_KERNEL)) + return ERR_PTR(-ENOMEM); + err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs, GFP_KERNEL); + if (err) + return ERR_PTR(err); + if (pool->irqs_per_cpu) { + if (cpumask_weight(req_mask) > 1) + /* if req_mask contain more then one CPU, set the least loadad CPU + * of req_mask + */ + cpumask_set_cpu(cpu_get_least_loaded(pool, req_mask), auto_mask); + else + cpu_get(pool, cpumask_first(req_mask)); + } + irq = mlx5_irq_alloc(pool, irq_index, cpumask_empty(auto_mask) ? req_mask : auto_mask); + free_cpumask_var(auto_mask); + return irq; +} + +/* Looking for the IRQ with the smallest refcount that fits req_mask. + * If pool is sf_comp_pool, then we are looking for an IRQ with any of the + * requested CPUs in req_mask. + * for example: req_mask = 0xf, irq0_mask = 0x10, irq1_mask = 0x1. irq0_mask + * isn't subset of req_mask, so we will skip it. irq1_mask is subset of req_mask, + * we don't skip it. + * If pool is sf_ctrl_pool, then all IRQs have the same mask, so any IRQ will + * fit. And since mask is subset of itself, we will pass the first if bellow. + */ +static struct mlx5_irq * +irq_pool_find_least_loaded(struct mlx5_irq_pool *pool, const struct cpumask *req_mask) +{ + int start = pool->xa_num_irqs.min; + int end = pool->xa_num_irqs.max; + struct mlx5_irq *irq = NULL; + struct mlx5_irq *iter; + int irq_refcount = 0; + unsigned long index; + + lockdep_assert_held(&pool->lock); + xa_for_each_range(&pool->irqs, index, iter, start, end) { + struct cpumask *iter_mask = mlx5_irq_get_affinity_mask(iter); + int iter_refcount = mlx5_irq_read_locked(iter); + + if (!cpumask_subset(iter_mask, req_mask)) + /* skip IRQs with a mask which is not subset of req_mask */ + continue; + if (iter_refcount < pool->min_threshold) + /* If we found an IRQ with less than min_thres, return it */ + return iter; + if (!irq || iter_refcount < irq_refcount) { + /* In case we won't find an IRQ with less than min_thres, + * keep a pointer to the least used IRQ + */ + irq_refcount = iter_refcount; + irq = iter; + } + } + return irq; +} + +/** + * mlx5_irq_affinity_request - request an IRQ according to the given mask. + * @pool: IRQ pool to request from. + * @req_mask: cpumask requested for this IRQ. + * + * This function returns a pointer to IRQ, or ERR_PTR in case of error. + */ +struct mlx5_irq * +mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask) +{ + struct mlx5_irq *least_loaded_irq, *new_irq; + + mutex_lock(&pool->lock); + least_loaded_irq = irq_pool_find_least_loaded(pool, req_mask); + if (least_loaded_irq && + mlx5_irq_read_locked(least_loaded_irq) < pool->min_threshold) + goto out; + /* We didn't find an IRQ with less than min_thres, try to allocate a new IRQ */ + new_irq = irq_pool_request_irq(pool, req_mask); + if (IS_ERR(new_irq)) { + if (!least_loaded_irq) { + /* We failed to create an IRQ and we didn't find an IRQ */ + mlx5_core_err(pool->dev, "Didn't find a matching IRQ. err = %ld\n", + PTR_ERR(new_irq)); + mutex_unlock(&pool->lock); + return new_irq; + } + /* We failed to create a new IRQ for the requested affinity, + * sharing existing IRQ. + */ + goto out; + } + least_loaded_irq = new_irq; + goto unlock; +out: + mlx5_irq_get_locked(least_loaded_irq); + if (mlx5_irq_read_locked(least_loaded_irq) > pool->max_threshold) + mlx5_core_dbg(pool->dev, "IRQ %u overloaded, pool_name: %s, %u EQs on this irq\n", + pci_irq_vector(pool->dev->pdev, + mlx5_irq_get_index(least_loaded_irq)), pool->name, + mlx5_irq_read_locked(least_loaded_irq) / MLX5_EQ_REFS_PER_IRQ); +unlock: + mutex_unlock(&pool->lock); + return least_loaded_irq; +} + +void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs, + int num_irqs) +{ + struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); + int i; + + for (i = 0; i < num_irqs; i++) { + int cpu = cpumask_first(mlx5_irq_get_affinity_mask(irqs[i])); + + synchronize_irq(pci_irq_vector(pool->dev->pdev, + mlx5_irq_get_index(irqs[i]))); + if (mlx5_irq_put(irqs[i])) + if (pool->irqs_per_cpu) + cpu_put(pool, cpu); + } +} + +/** + * mlx5_irq_affinity_irqs_request_auto - request one or more IRQs for mlx5 device. + * @dev: mlx5 device that is requesting the IRQs. + * @nirqs: number of IRQs to request. + * @irqs: an output array of IRQs pointers. + * + * Each IRQ is bounded to at most 1 CPU. + * This function is requesting IRQs according to the default assignment. + * The default assignment policy is: + * - in each iteration, request the least loaded IRQ which is not bound to any + * CPU of the previous IRQs requested. + * + * This function returns the number of IRQs requested, (which might be smaller than + * @nirqs), if successful, or a negative error code in case of an error. + */ +int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs, + struct mlx5_irq **irqs) +{ + struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); + cpumask_var_t req_mask; + struct mlx5_irq *irq; + int i = 0; + + if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL)) + return -ENOMEM; + cpumask_copy(req_mask, cpu_online_mask); + for (i = 0; i < nirqs; i++) { + if (mlx5_irq_pool_is_sf_pool(pool)) + irq = mlx5_irq_affinity_request(pool, req_mask); + else + /* In case SF pool doesn't exists, fallback to the PF IRQs. + * The PF IRQs are already allocated and binded to CPU + * at this point. Hence, only an index is needed. + */ + irq = mlx5_irq_request(dev, i, NULL); + if (IS_ERR(irq)) + break; + irqs[i] = irq; + cpumask_clear_cpu(cpumask_first(mlx5_irq_get_affinity_mask(irq)), req_mask); + mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n", + pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)), + cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)), + mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ); + } + free_cpumask_var(req_mask); + if (!i) + return PTR_ERR(irq); + return i; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 6b225bb5751a..2c774f367199 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -98,6 +98,8 @@ enum { MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1, }; +#define LOG_MAX_SUPPORTED_QPS 0xff + static struct mlx5_profile profile[] = { [0] = { .mask = 0, @@ -109,7 +111,7 @@ static struct mlx5_profile profile[] = { [2] = { .mask = MLX5_PROF_MASK_QP_SIZE | MLX5_PROF_MASK_MR_CACHE, - .log_max_qp = 18, + .log_max_qp = LOG_MAX_SUPPORTED_QPS, .mr_cache[0] = { .size = 500, .limit = 250 @@ -523,7 +525,9 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) to_fw_pkey_sz(dev, 128)); /* Check log_max_qp from HCA caps to set in current profile */ - if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) { + if (prof->log_max_qp == LOG_MAX_SUPPORTED_QPS) { + prof->log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp); + } else if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) { mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n", prof->log_max_qp, MLX5_CAP_GEN_MAX(dev, log_max_qp)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h index 8116815663a7..23cb63fa4588 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h @@ -22,12 +22,40 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int devfn, int msix_vec_count); int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs); +struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev); +void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq); struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx, struct cpumask *affinity); -void mlx5_irq_release(struct mlx5_irq *irq); +int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs, + struct mlx5_irq **irqs); +void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs); int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb); int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb); struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq); int mlx5_irq_get_index(struct mlx5_irq *irq); +struct mlx5_irq_pool; +#ifdef CONFIG_MLX5_SF +int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs, + struct mlx5_irq **irqs); +struct mlx5_irq *mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, + const struct cpumask *req_mask); +void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs, + int num_irqs); +#else +static inline int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs, + struct mlx5_irq **irqs) +{ + return -EOPNOTSUPP; +} + +static inline struct mlx5_irq * +mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, + struct mlx5_irq **irqs, int num_irqs) {} +#endif #endif /* __MLX5_IRQ_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 163e01fe500e..90fec0649ef5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -7,15 +7,12 @@ #include #include "mlx5_core.h" #include "mlx5_irq.h" +#include "pci_irq.h" #include "lib/sf.h" #ifdef CONFIG_RFS_ACCEL #include #endif -#define MLX5_MAX_IRQ_NAME (32) -/* max irq_index is 2047, so four chars */ -#define MLX5_MAX_IRQ_IDX_CHARS (4) - #define MLX5_SFS_PER_CTRL_IRQ 64 #define MLX5_IRQ_CTRL_SF_MAX 8 /* min num of vectors for SFs to be enabled */ @@ -25,7 +22,6 @@ #define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX) #define MLX5_EQ_SHARE_IRQ_MIN_COMP (1) #define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4) -#define MLX5_EQ_REFS_PER_IRQ (2) struct mlx5_irq { struct atomic_notifier_head nh; @@ -37,16 +33,6 @@ struct mlx5_irq { int irqn; }; -struct mlx5_irq_pool { - char name[MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS]; - struct xa_limit xa_num_irqs; - struct mutex lock; /* sync IRQs creations */ - struct xarray irqs; - u32 max_threshold; - u32 min_threshold; - struct mlx5_core_dev *dev; -}; - struct mlx5_irq_table { struct mlx5_irq_pool *pf_pool; struct mlx5_irq_pool *sf_ctrl_pool; @@ -153,18 +139,28 @@ static void irq_release(struct mlx5_irq *irq) kfree(irq); } -static void irq_put(struct mlx5_irq *irq) +int mlx5_irq_put(struct mlx5_irq *irq) { struct mlx5_irq_pool *pool = irq->pool; + int ret = 0; mutex_lock(&pool->lock); irq->refcount--; - if (!irq->refcount) + if (!irq->refcount) { irq_release(irq); + ret = 1; + } mutex_unlock(&pool->lock); + return ret; } -static int irq_get_locked(struct mlx5_irq *irq) +int mlx5_irq_read_locked(struct mlx5_irq *irq) +{ + lockdep_assert_held(&irq->pool->lock); + return irq->refcount; +} + +int mlx5_irq_get_locked(struct mlx5_irq *irq) { lockdep_assert_held(&irq->pool->lock); if (WARN_ON_ONCE(!irq->refcount)) @@ -178,7 +174,7 @@ static int irq_get(struct mlx5_irq *irq) int err; mutex_lock(&irq->pool->lock); - err = irq_get_locked(irq); + err = mlx5_irq_get_locked(irq); mutex_unlock(&irq->pool->lock); return err; } @@ -210,12 +206,8 @@ static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx) snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx); } -static bool irq_pool_is_sf_pool(struct mlx5_irq_pool *pool) -{ - return !strncmp("mlx5_sf", pool->name, strlen("mlx5_sf")); -} - -static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i) +struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, + const struct cpumask *affinity) { struct mlx5_core_dev *dev = pool->dev; char name[MLX5_MAX_IRQ_NAME]; @@ -226,7 +218,7 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i) if (!irq) return ERR_PTR(-ENOMEM); irq->irqn = pci_irq_vector(dev->pdev, i); - if (!irq_pool_is_sf_pool(pool)) + if (!mlx5_irq_pool_is_sf_pool(pool)) irq_set_name(pool, name, i); else irq_sf_set_name(pool, name, i); @@ -244,6 +236,10 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i) err = -ENOMEM; goto err_cpumask; } + if (affinity) { + cpumask_copy(irq->mask, affinity); + irq_set_affinity_hint(irq->irqn, irq->mask); + } irq->pool = pool; irq->refcount = 1; irq->index = i; @@ -255,6 +251,7 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i) } return irq; err_xa: + irq_set_affinity_hint(irq->irqn, NULL); free_cpumask_var(irq->mask); err_cpumask: free_irq(irq->irqn, &irq->nh); @@ -275,7 +272,7 @@ int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb) return -ENOENT; ret = atomic_notifier_chain_register(&irq->nh, nb); if (ret) - irq_put(irq); + mlx5_irq_put(irq); return ret; } @@ -284,7 +281,7 @@ int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb) int err = 0; err = atomic_notifier_chain_unregister(&irq->nh, nb); - irq_put(irq); + mlx5_irq_put(irq); return err; } @@ -300,85 +297,6 @@ int mlx5_irq_get_index(struct mlx5_irq *irq) /* irq_pool API */ -/* creating an irq from irq_pool */ -static struct mlx5_irq *irq_pool_create_irq(struct mlx5_irq_pool *pool, - struct cpumask *affinity) -{ - struct mlx5_irq *irq; - u32 irq_index; - int err; - - err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs, - GFP_KERNEL); - if (err) - return ERR_PTR(err); - irq = irq_request(pool, irq_index); - if (IS_ERR(irq)) - return irq; - cpumask_copy(irq->mask, affinity); - irq_set_affinity_hint(irq->irqn, irq->mask); - return irq; -} - -/* looking for the irq with the smallest refcount and the same affinity */ -static struct mlx5_irq *irq_pool_find_least_loaded(struct mlx5_irq_pool *pool, - struct cpumask *affinity) -{ - int start = pool->xa_num_irqs.min; - int end = pool->xa_num_irqs.max; - struct mlx5_irq *irq = NULL; - struct mlx5_irq *iter; - unsigned long index; - - lockdep_assert_held(&pool->lock); - xa_for_each_range(&pool->irqs, index, iter, start, end) { - if (!cpumask_equal(iter->mask, affinity)) - continue; - if (iter->refcount < pool->min_threshold) - return iter; - if (!irq || iter->refcount < irq->refcount) - irq = iter; - } - return irq; -} - -/* requesting an irq from a given pool according to given affinity */ -static struct mlx5_irq *irq_pool_request_affinity(struct mlx5_irq_pool *pool, - struct cpumask *affinity) -{ - struct mlx5_irq *least_loaded_irq, *new_irq; - - mutex_lock(&pool->lock); - least_loaded_irq = irq_pool_find_least_loaded(pool, affinity); - if (least_loaded_irq && - least_loaded_irq->refcount < pool->min_threshold) - goto out; - new_irq = irq_pool_create_irq(pool, affinity); - if (IS_ERR(new_irq)) { - if (!least_loaded_irq) { - mlx5_core_err(pool->dev, "Didn't find a matching IRQ. err = %ld\n", - PTR_ERR(new_irq)); - mutex_unlock(&pool->lock); - return new_irq; - } - /* We failed to create a new IRQ for the requested affinity, - * sharing existing IRQ. - */ - goto out; - } - least_loaded_irq = new_irq; - goto unlock; -out: - irq_get_locked(least_loaded_irq); - if (least_loaded_irq->refcount > pool->max_threshold) - mlx5_core_dbg(pool->dev, "IRQ %u overloaded, pool_name: %s, %u EQs on this irq\n", - least_loaded_irq->irqn, pool->name, - least_loaded_irq->refcount / MLX5_EQ_REFS_PER_IRQ); -unlock: - mutex_unlock(&pool->lock); - return least_loaded_irq; -} - /* requesting an irq from a given pool according to given index */ static struct mlx5_irq * irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx, @@ -389,42 +307,111 @@ irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx, mutex_lock(&pool->lock); irq = xa_load(&pool->irqs, vecidx); if (irq) { - irq_get_locked(irq); + mlx5_irq_get_locked(irq); goto unlock; } - irq = irq_request(pool, vecidx); - if (IS_ERR(irq) || !affinity) - goto unlock; - cpumask_copy(irq->mask, affinity); - if (!irq_pool_is_sf_pool(pool) && !pool->xa_num_irqs.max && - cpumask_empty(irq->mask)) - cpumask_set_cpu(cpumask_first(cpu_online_mask), irq->mask); - irq_set_affinity_hint(irq->irqn, irq->mask); + irq = mlx5_irq_alloc(pool, vecidx, affinity); unlock: mutex_unlock(&pool->lock); return irq; } -static struct mlx5_irq_pool *find_sf_irq_pool(struct mlx5_irq_table *irq_table, - int i, struct cpumask *affinity) +static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_table) +{ + return irq_table->sf_ctrl_pool; +} + +static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table) { - if (cpumask_empty(affinity) && i == MLX5_IRQ_EQ_CTRL) - return irq_table->sf_ctrl_pool; return irq_table->sf_comp_pool; } -/** - * mlx5_irq_release - release an IRQ back to the system. - * @irq: irq to be released. - */ -void mlx5_irq_release(struct mlx5_irq *irq) +struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev) { - synchronize_irq(irq->irqn); - irq_put(irq); + struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); + struct mlx5_irq_pool *pool = NULL; + + if (mlx5_core_is_sf(dev)) + pool = sf_irq_pool_get(irq_table); + + /* In some configs, there won't be a pool of SFs IRQs. Hence, returning + * the PF IRQs pool in case the SF pool doesn't exist. + */ + return pool ? pool : irq_table->pf_pool; +} + +static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); + struct mlx5_irq_pool *pool = NULL; + + if (mlx5_core_is_sf(dev)) + pool = sf_ctrl_irq_pool_get(irq_table); + + /* In some configs, there won't be a pool of SFs IRQs. Hence, returning + * the PF IRQs pool in case the SF pool doesn't exist. + */ + return pool ? pool : irq_table->pf_pool; } /** - * mlx5_irq_request - request an IRQ for mlx5 device. + * mlx5_irqs_release - release one or more IRQs back to the system. + * @irqs: IRQs to be released. + * @nirqs: number of IRQs to be released. + */ +static void mlx5_irqs_release(struct mlx5_irq **irqs, int nirqs) +{ + int i; + + for (i = 0; i < nirqs; i++) { + synchronize_irq(irqs[i]->irqn); + mlx5_irq_put(irqs[i]); + } +} + +/** + * mlx5_ctrl_irq_release - release a ctrl IRQ back to the system. + * @ctrl_irq: ctrl IRQ to be released. + */ +void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq) +{ + mlx5_irqs_release(&ctrl_irq, 1); +} + +/** + * mlx5_ctrl_irq_request - request a ctrl IRQ for mlx5 device. + * @dev: mlx5 device that requesting the IRQ. + * + * This function returns a pointer to IRQ, or ERR_PTR in case of error. + */ +struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev); + cpumask_var_t req_mask; + struct mlx5_irq *irq; + + if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL)) + return ERR_PTR(-ENOMEM); + cpumask_copy(req_mask, cpu_online_mask); + if (!mlx5_irq_pool_is_sf_pool(pool)) { + /* In case we are allocating a control IRQ for PF/VF */ + if (!pool->xa_num_irqs.max) { + cpumask_clear(req_mask); + /* In case we only have a single IRQ for PF/VF */ + cpumask_set_cpu(cpumask_first(cpu_online_mask), req_mask); + } + /* Allocate the IRQ in the last index of the pool */ + irq = irq_pool_request_vector(pool, pool->xa_num_irqs.max, req_mask); + } else { + irq = mlx5_irq_affinity_request(pool, req_mask); + } + + free_cpumask_var(req_mask); + return irq; +} + +/** + * mlx5_irq_request - request an IRQ for mlx5 PF/VF device. * @dev: mlx5 device that requesting the IRQ. * @vecidx: vector index of the IRQ. This argument is ignore if affinity is * provided. @@ -439,23 +426,8 @@ struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx, struct mlx5_irq_pool *pool; struct mlx5_irq *irq; - if (mlx5_core_is_sf(dev)) { - pool = find_sf_irq_pool(irq_table, vecidx, affinity); - if (!pool) - /* we don't have IRQs for SFs, using the PF IRQs */ - goto pf_irq; - if (cpumask_empty(affinity) && !strcmp(pool->name, "mlx5_sf_comp")) - /* In case an SF user request IRQ with vecidx */ - irq = irq_pool_request_vector(pool, vecidx, NULL); - else - irq = irq_pool_request_affinity(pool, affinity); - goto out; - } -pf_irq: pool = irq_table->pf_pool; - vecidx = (vecidx == MLX5_IRQ_EQ_CTRL) ? pool->xa_num_irqs.max : vecidx; irq = irq_pool_request_vector(pool, vecidx, affinity); -out: if (IS_ERR(irq)) return irq; mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n", @@ -464,6 +436,51 @@ struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx, return irq; } +/** + * mlx5_irqs_release_vectors - release one or more IRQs back to the system. + * @irqs: IRQs to be released. + * @nirqs: number of IRQs to be released. + */ +void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs) +{ + mlx5_irqs_release(irqs, nirqs); +} + +/** + * mlx5_irqs_request_vectors - request one or more IRQs for mlx5 device. + * @dev: mlx5 device that is requesting the IRQs. + * @cpus: CPUs array for binding the IRQs + * @nirqs: number of IRQs to request. + * @irqs: an output array of IRQs pointers. + * + * Each IRQ is bound to at most 1 CPU. + * This function is requests nirqs IRQs, starting from @vecidx. + * + * This function returns the number of IRQs requested, (which might be smaller than + * @nirqs), if successful, or a negative error code in case of an error. + */ +int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs, + struct mlx5_irq **irqs) +{ + cpumask_var_t req_mask; + struct mlx5_irq *irq; + int i; + + if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL)) + return -ENOMEM; + for (i = 0; i < nirqs; i++) { + cpumask_set_cpu(cpus[i], req_mask); + irq = mlx5_irq_request(dev, i, req_mask); + if (IS_ERR(irq)) + break; + cpumask_clear(req_mask); + irqs[i] = irq; + } + + free_cpumask_var(req_mask); + return i ? i : PTR_ERR(irq); +} + static struct mlx5_irq_pool * irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name, u32 min_threshold, u32 max_threshold) @@ -500,6 +517,7 @@ static void irq_pool_free(struct mlx5_irq_pool *pool) irq_release(irq); xa_destroy(&pool->irqs); mutex_destroy(&pool->lock); + kfree(pool->irqs_per_cpu); kvfree(pool); } @@ -547,7 +565,17 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec) err = PTR_ERR(table->sf_comp_pool); goto err_sf_ctrl; } + + table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL); + if (!table->sf_comp_pool->irqs_per_cpu) { + err = -ENOMEM; + goto err_irqs_per_cpu; + } + return 0; + +err_irqs_per_cpu: + irq_pool_free(table->sf_comp_pool); err_sf_ctrl: irq_pool_free(table->sf_ctrl_pool); err_pf: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h new file mode 100644 index 000000000000..5c7e68bee43a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __PCI_IRQ_H__ +#define __PCI_IRQ_H__ + +#include + +#define MLX5_MAX_IRQ_NAME (32) +/* max irq_index is 2047, so four chars */ +#define MLX5_MAX_IRQ_IDX_CHARS (4) +#define MLX5_EQ_REFS_PER_IRQ (2) + +struct mlx5_irq; + +struct mlx5_irq_pool { + char name[MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS]; + struct xa_limit xa_num_irqs; + struct mutex lock; /* sync IRQs creations */ + struct xarray irqs; + u32 max_threshold; + u32 min_threshold; + u16 *irqs_per_cpu; + struct mlx5_core_dev *dev; +}; + +struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev); +static inline bool mlx5_irq_pool_is_sf_pool(struct mlx5_irq_pool *pool) +{ + return !strncmp("mlx5_sf", pool->name, strlen("mlx5_sf")); +} + +struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, + const struct cpumask *affinity); +int mlx5_irq_get_locked(struct mlx5_irq *irq); +int mlx5_irq_read_locked(struct mlx5_irq *irq); +int mlx5_irq_put(struct mlx5_irq *irq); + +#endif /* __PCI_IRQ_H__ */ diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h index ea3ff5a8ced3..3705a382276b 100644 --- a/include/linux/mlx5/eq.h +++ b/include/linux/mlx5/eq.h @@ -9,13 +9,13 @@ #define MLX5_NUM_SPARE_EQE (0x80) struct mlx5_eq; +struct mlx5_irq; struct mlx5_core_dev; struct mlx5_eq_param { - u8 irq_index; int nent; u64 mask[4]; - cpumask_var_t affinity; + struct mlx5_irq *irq; }; struct mlx5_eq *