linux/net/devlink/devl_internal.h
Jiri Pirko e93c9378e3 devlink: change per-devlink netdev notifier to static one
The commit 565b4824c3 ("devlink: change port event netdev notifier
from per-net to global") changed original per-net notifier to be
per-devlink instance. That fixed the issue of non-receiving events
of netdev uninit if that moved to a different namespace.
That worked fine in -net tree.

However, later on when commit ee75f1fc44 ("net/mlx5e: Create
separate devlink instance for ethernet auxiliary device") and
commit 72ed5d5624 ("net/mlx5: Suspend auxiliary devices only in
case of PCI device suspend") were merged, a deadlock was introduced
when removing a namespace with devlink instance with another nested
instance.

Here there is the bad flow example resulting in deadlock with mlx5:
net_cleanup_work -> cleanup_net (takes down_read(&pernet_ops_rwsem) ->
devlink_pernet_pre_exit() -> devlink_reload() ->
mlx5_devlink_reload_down() -> mlx5_unload_one_devl_locked() ->
mlx5_detach_device() -> del_adev() -> mlx5e_remove() ->
mlx5e_destroy_devlink() -> devlink_free() ->
unregister_netdevice_notifier() (takes down_write(&pernet_ops_rwsem)

Steps to reproduce:
$ modprobe mlx5_core
$ ip netns add ns1
$ devlink dev reload pci/0000:08:00.0 netns ns1
$ ip netns del ns1

Resolve this by converting the notifier from per-devlink instance to
a static one registered during init phase and leaving it registered
forever. Use this notifier for all devlink port instances created
later on.

Note what a tree needs this fix only in case all of the cited fixes
commits are present.

Reported-by: Moshe Shemesh <moshe@nvidia.com>
Fixes: 565b4824c3 ("devlink: change port event netdev notifier from per-net to global")
Fixes: ee75f1fc44 ("net/mlx5e: Create separate devlink instance for ethernet auxiliary device")
Fixes: 72ed5d5624 ("net/mlx5: Suspend auxiliary devices only in case of PCI device suspend")
Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Link: https://lore.kernel.org/r/20230510144621.932017-1-jiri@resnulli.us
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-05-11 18:06:50 -07:00

239 lines
8.1 KiB
C

/* SPDX-License-Identifier: GPL-2.0-or-later */
/* Copyright (c) 2016 Mellanox Technologies. All rights reserved.
* Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
*/
#include <linux/mutex.h>
#include <linux/netdevice.h>
#include <linux/notifier.h>
#include <linux/types.h>
#include <linux/workqueue.h>
#include <linux/xarray.h>
#include <net/devlink.h>
#include <net/net_namespace.h>
#define DEVLINK_REGISTERED XA_MARK_1
#define DEVLINK_RELOAD_STATS_ARRAY_SIZE \
(__DEVLINK_RELOAD_LIMIT_MAX * __DEVLINK_RELOAD_ACTION_MAX)
struct devlink_dev_stats {
u32 reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
};
struct devlink {
u32 index;
struct xarray ports;
struct list_head rate_list;
struct list_head sb_list;
struct list_head dpipe_table_list;
struct list_head resource_list;
struct xarray params;
struct list_head region_list;
struct list_head reporter_list;
struct devlink_dpipe_headers *dpipe_headers;
struct list_head trap_list;
struct list_head trap_group_list;
struct list_head trap_policer_list;
struct list_head linecard_list;
const struct devlink_ops *ops;
struct xarray snapshot_ids;
struct devlink_dev_stats stats;
struct device *dev;
possible_net_t _net;
/* Serializes access to devlink instance specific objects such as
* port, sb, dpipe, resource, params, region, traps and more.
*/
struct mutex lock;
struct lock_class_key lock_key;
u8 reload_failed:1;
refcount_t refcount;
struct rcu_work rwork;
char priv[] __aligned(NETDEV_ALIGN);
};
extern struct xarray devlinks;
extern struct genl_family devlink_nl_family;
/* devlink instances are open to the access from the user space after
* devlink_register() call. Such logical barrier allows us to have certain
* expectations related to locking.
*
* Before *_register() - we are in initialization stage and no parallel
* access possible to the devlink instance. All drivers perform that phase
* by implicitly holding device_lock.
*
* After *_register() - users and driver can access devlink instance at
* the same time.
*/
#define ASSERT_DEVLINK_REGISTERED(d) \
WARN_ON_ONCE(!xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED))
#define ASSERT_DEVLINK_NOT_REGISTERED(d) \
WARN_ON_ONCE(xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED))
/* Iterate over devlink pointers which were possible to get reference to.
* devlink_put() needs to be called for each iterated devlink pointer
* in loop body in order to release the reference.
*/
#define devlinks_xa_for_each_registered_get(net, index, devlink) \
for (index = 0; (devlink = devlinks_xa_find_get(net, &index)); index++)
struct devlink *devlinks_xa_find_get(struct net *net, unsigned long *indexp);
static inline bool devl_is_registered(struct devlink *devlink)
{
devl_assert_locked(devlink);
return xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
}
/* Netlink */
#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
#define DEVLINK_NL_FLAG_NEED_RATE BIT(2)
#define DEVLINK_NL_FLAG_NEED_RATE_NODE BIT(3)
#define DEVLINK_NL_FLAG_NEED_LINECARD BIT(4)
enum devlink_multicast_groups {
DEVLINK_MCGRP_CONFIG,
};
/* state held across netlink dumps */
struct devlink_nl_dump_state {
unsigned long instance;
int idx;
union {
/* DEVLINK_CMD_REGION_READ */
struct {
u64 start_offset;
};
/* DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET */
struct {
u64 dump_ts;
};
};
};
struct devlink_cmd {
int (*dump_one)(struct sk_buff *msg, struct devlink *devlink,
struct netlink_callback *cb);
};
extern const struct genl_small_ops devlink_nl_ops[56];
struct devlink *
devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs);
void devlink_notify_unregister(struct devlink *devlink);
void devlink_notify_register(struct devlink *devlink);
int devlink_nl_instance_iter_dumpit(struct sk_buff *msg,
struct netlink_callback *cb);
static inline struct devlink_nl_dump_state *
devlink_dump_state(struct netlink_callback *cb)
{
NL_ASSERT_DUMP_CTX_FITS(struct devlink_nl_dump_state);
return (struct devlink_nl_dump_state *)cb->ctx;
}
static inline int
devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
{
if (nla_put_string(msg, DEVLINK_ATTR_BUS_NAME, devlink->dev->bus->name))
return -EMSGSIZE;
if (nla_put_string(msg, DEVLINK_ATTR_DEV_NAME, dev_name(devlink->dev)))
return -EMSGSIZE;
return 0;
}
/* Commands */
extern const struct devlink_cmd devl_cmd_get;
extern const struct devlink_cmd devl_cmd_port_get;
extern const struct devlink_cmd devl_cmd_sb_get;
extern const struct devlink_cmd devl_cmd_sb_pool_get;
extern const struct devlink_cmd devl_cmd_sb_port_pool_get;
extern const struct devlink_cmd devl_cmd_sb_tc_pool_bind_get;
extern const struct devlink_cmd devl_cmd_param_get;
extern const struct devlink_cmd devl_cmd_region_get;
extern const struct devlink_cmd devl_cmd_info_get;
extern const struct devlink_cmd devl_cmd_health_reporter_get;
extern const struct devlink_cmd devl_cmd_trap_get;
extern const struct devlink_cmd devl_cmd_trap_group_get;
extern const struct devlink_cmd devl_cmd_trap_policer_get;
extern const struct devlink_cmd devl_cmd_rate_get;
extern const struct devlink_cmd devl_cmd_linecard_get;
extern const struct devlink_cmd devl_cmd_selftests_get;
/* Notify */
void devlink_notify(struct devlink *devlink, enum devlink_command cmd);
/* Ports */
int devlink_port_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr);
struct devlink_port *
devlink_port_get_from_info(struct devlink *devlink, struct genl_info *info);
struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink,
struct nlattr **attrs);
/* Reload */
bool devlink_reload_actions_valid(const struct devlink_ops *ops);
int devlink_reload(struct devlink *devlink, struct net *dest_net,
enum devlink_reload_action action,
enum devlink_reload_limit limit,
u32 *actions_performed, struct netlink_ext_ack *extack);
static inline bool devlink_reload_supported(const struct devlink_ops *ops)
{
return ops->reload_down && ops->reload_up;
}
/* Params */
void devlink_params_driverinit_load_new(struct devlink *devlink);
/* Resources */
struct devlink_resource;
int devlink_resources_validate(struct devlink *devlink,
struct devlink_resource *resource,
struct genl_info *info);
/* Line cards */
struct devlink_linecard;
struct devlink_linecard *
devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info);
/* Rates */
int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
struct netlink_ext_ack *extack);
struct devlink_rate *
devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info);
struct devlink_rate *
devlink_rate_node_get_from_info(struct devlink *devlink,
struct genl_info *info);
/* Devlink nl cmds */
int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_cmd_info_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_cmd_flash_update(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_cmd_selftests_run(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
struct genl_info *info);
int devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
struct genl_info *info);
int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
struct genl_info *info);
int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
struct genl_info *info);
int devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb);
int devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
struct genl_info *info);
int devlink_nl_cmd_health_reporter_test_doit(struct sk_buff *skb,
struct genl_info *info);