Merge branch 'mld-sleepable'

Taehee Yoo says:

====================
mld: change context from atomic to sleepable

This patchset changes the context of MLD module.
Before this patchset, MLD functions are atomic context so it couldn't use
sleepable functions and flags.

There are several reasons why MLD functions are under atomic context.
1. It uses timer API.
Timer expiration functions are executed in the atomic context.
2. atomic locks
MLD functions use rwlock and spinlock to protect their own resources.

So, in order to switch context, this patchset converts resources to use
RCU and removes atomic locks and timer API.

1. The first patch convert from the timer API to delayed work.
Timer API is used for delaying some works.
MLD protocol has a delay mechanism, which is used for replying to a query.
If a listener receives a query from a router, it should send a response
after some delay. But because of timer expire function is executed in
the atomic context, this patch convert from timer API to the delayed work.

2. The fourth patch deletes inet6_dev->mc_lock.
The mc_lock has protected inet6_dev->mc_tomb pointer.
But this pointer is already protected by RTNL and it isn't be used by
datapath. So, it isn't be needed and because of this, many atomic context
critical sections are deleted.

3. The fifth patch convert ip6_sf_socklist to RCU.
ip6_sf_socklist has been protected by ipv6_mc_socklist->sflock(rwlock).
But this is already protected by RTNL So if it is converted to use RCU
in order to be used in the datapath, the sflock is no more needed.
So, its control path context can be switched to sleepable.

4. The sixth patch convert ip6_sf_list to RCU.
The reason for this patch is the same as the previous patch.

5. The seventh patch convert ifmcaddr6 to RCU.
The reason for this patch is the same as the previous patch.

6. Add new workqueues for processing query/report event.
By this patch, query and report events are processed by workqueue
So context is sleepable, not atomic.
While this logic, it acquires RTNL.

7. Add new mc_lock.
The purpose of this lock is to protect per-interface mld data.
Per-interface mld data is usually used by query/report event handler.
So, query/report event workers need only this lock instead of RTNL.
Therefore, it could reduce bottleneck.

Changelog:
v2 -> v3:
1. Do not use msecs_to_jiffies().
(by Cong Wang)
2. Do not add unnecessary rtnl_lock() and rtnl_unlock().
(by Cong Wang)
3. Fix sparse warnings because of rcu annotation.
(by kernel test robot)
   - Remove some rcu_assign_pointer(), which was used for non-rcu pointer.
   - Add union for rcu pointer.
   - Use rcu API in mld_clear_zeros().
   - Remove remained rcu_read_unlock().
   - Use rcu API for tomb resources.
4. withdraw prevopus 2nd and 3rd patch.
   - "separate two flags from ifmcaddr6->mca_flags"
   - "add a new delayed_work, mc_delrec_work"
5. Add 6th and 7th patch.

v1 -> v2:
1. Withdraw unnecessary refactoring patches.
(by Cong Wang, Eric Dumazet, David Ahern)
    a) convert from array to list.
    b) function rename.
2. Separate big one patch into small several patches.
3. Do not rename 'ifmcaddr6->mca_lock'.
In the v1 patch, this variable was changed to 'ifmcaddr6->mca_work_lock'.
But this is actually not needed.
4. Do not use atomic_t for 'ifmcaddr6->mca_sfcount' and
'ipv6_mc_socklist'->sf_count'.
5. Do not add mld_check_leave_group() function.
6. Do not add ip6_mc_del_src_bulk() function.
7. Do not add ip6_mc_add_src_bulk() function.
8. Do not use rcu_read_lock() in the qeth_l3_add_mcast_rtnl().
(by Julian Wiedmann)
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2021-03-26 15:14:57 -07:00
commit 32bc7a2cca
9 changed files with 678 additions and 471 deletions

View file

@ -1098,8 +1098,9 @@ static int qeth_l3_add_mcast_rtnl(struct net_device *dev, int vid, void *arg)
tmp.disp_flag = QETH_DISP_ADDR_ADD;
tmp.is_multicast = 1;
read_lock_bh(&in6_dev->lock);
for (im6 = in6_dev->mc_list; im6 != NULL; im6 = im6->next) {
for (im6 = rtnl_dereference(in6_dev->mc_list);
im6;
im6 = rtnl_dereference(im6->next)) {
tmp.u.a6.addr = im6->mca_addr;
ipm = qeth_l3_find_addr_by_ip(card, &tmp);
@ -1117,7 +1118,6 @@ static int qeth_l3_add_mcast_rtnl(struct net_device *dev, int vid, void *arg)
qeth_l3_ipaddr_hash(ipm));
}
read_unlock_bh(&in6_dev->lock);
out:
return 0;

View file

@ -78,6 +78,7 @@ struct inet6_ifaddr {
struct ip6_sf_socklist {
unsigned int sl_max;
unsigned int sl_count;
struct rcu_head rcu;
struct in6_addr sl_addr[];
};
@ -91,18 +92,18 @@ struct ipv6_mc_socklist {
int ifindex;
unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */
struct ipv6_mc_socklist __rcu *next;
rwlock_t sflock;
struct ip6_sf_socklist *sflist;
struct ip6_sf_socklist __rcu *sflist;
struct rcu_head rcu;
};
struct ip6_sf_list {
struct ip6_sf_list *sf_next;
struct ip6_sf_list __rcu *sf_next;
struct in6_addr sf_addr;
unsigned long sf_count[2]; /* include/exclude counts */
unsigned char sf_gsresp; /* include in g & s response? */
unsigned char sf_oldin; /* change state */
unsigned char sf_crcount; /* retrans. left to send */
struct rcu_head rcu;
};
#define MAF_TIMER_RUNNING 0x01
@ -114,19 +115,19 @@ struct ip6_sf_list {
struct ifmcaddr6 {
struct in6_addr mca_addr;
struct inet6_dev *idev;
struct ifmcaddr6 *next;
struct ip6_sf_list *mca_sources;
struct ip6_sf_list *mca_tomb;
struct ifmcaddr6 __rcu *next;
struct ip6_sf_list __rcu *mca_sources;
struct ip6_sf_list __rcu *mca_tomb;
unsigned int mca_sfmode;
unsigned char mca_crcount;
unsigned long mca_sfcount[2];
struct timer_list mca_timer;
struct delayed_work mca_work;
unsigned int mca_flags;
int mca_users;
refcount_t mca_refcnt;
spinlock_t mca_lock;
unsigned long mca_cstamp;
unsigned long mca_tstamp;
struct rcu_head rcu;
};
/* Anycast stuff */
@ -165,9 +166,8 @@ struct inet6_dev {
struct list_head addr_list;
struct ifmcaddr6 *mc_list;
struct ifmcaddr6 *mc_tomb;
spinlock_t mc_lock;
struct ifmcaddr6 __rcu *mc_list;
struct ifmcaddr6 __rcu *mc_tomb;
unsigned char mc_qrv; /* Query Robustness Variable */
unsigned char mc_gq_running;
@ -179,9 +179,18 @@ struct inet6_dev {
unsigned long mc_qri; /* Query Response Interval */
unsigned long mc_maxdelay;
struct timer_list mc_gq_timer; /* general query timer */
struct timer_list mc_ifc_timer; /* interface change timer */
struct timer_list mc_dad_timer; /* dad complete mc timer */
struct delayed_work mc_gq_work; /* general query work */
struct delayed_work mc_ifc_work; /* interface change work */
struct delayed_work mc_dad_work; /* dad complete mc work */
struct delayed_work mc_query_work; /* mld query work */
struct delayed_work mc_report_work; /* mld report work */
struct sk_buff_head mc_query_queue; /* mld query queue */
struct sk_buff_head mc_report_queue; /* mld report queue */
spinlock_t mc_query_lock; /* mld query queue lock */
spinlock_t mc_report_lock; /* mld query report lock */
struct mutex mc_lock; /* mld global lock */
struct ifacaddr6 *ac_list;
rwlock_t lock;

View file

@ -92,6 +92,9 @@ struct mld2_query {
#define MLD_EXP_MIN_LIMIT 32768UL
#define MLDV1_MRD_MAX_COMPAT (MLD_EXP_MIN_LIMIT - 1)
#define MLD_MAX_QUEUE 8
#define MLD_MAX_SKBS 32
static inline unsigned long mldv2_mrc(const struct mld2_query *mlh2)
{
/* RFC3810, 5.1.3. Maximum Response Code */

View file

@ -454,8 +454,9 @@ batadv_mcast_mla_softif_get_ipv6(struct net_device *dev,
return 0;
}
read_lock_bh(&in6_dev->lock);
for (pmc6 = in6_dev->mc_list; pmc6; pmc6 = pmc6->next) {
for (pmc6 = rcu_dereference(in6_dev->mc_list);
pmc6;
pmc6 = rcu_dereference(pmc6->next)) {
if (IPV6_ADDR_MC_SCOPE(&pmc6->mca_addr) <
IPV6_ADDR_SCOPE_LINKLOCAL)
continue;
@ -484,7 +485,6 @@ batadv_mcast_mla_softif_get_ipv6(struct net_device *dev,
hlist_add_head(&new->list, mcast_list);
ret++;
}
read_unlock_bh(&in6_dev->lock);
rcu_read_unlock();
return ret;

View file

@ -5107,17 +5107,20 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
break;
}
case MULTICAST_ADDR:
read_unlock_bh(&idev->lock);
fillargs->event = RTM_GETMULTICAST;
/* multicast address */
for (ifmca = idev->mc_list; ifmca;
ifmca = ifmca->next, ip_idx++) {
for (ifmca = rcu_dereference(idev->mc_list);
ifmca;
ifmca = rcu_dereference(ifmca->next), ip_idx++) {
if (ip_idx < s_ip_idx)
continue;
err = inet6_fill_ifmcaddr(skb, ifmca, fillargs);
if (err < 0)
break;
}
read_lock_bh(&idev->lock);
break;
case ANYCAST_ADDR:
fillargs->event = RTM_GETANYCAST;
@ -6093,10 +6096,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
{
rcu_read_lock_bh();
if (likely(ifp->idev->dead == 0))
__ipv6_ifa_notify(event, ifp);
rcu_read_unlock_bh();
}
#ifdef CONFIG_SYSCTL

View file

@ -250,7 +250,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
struct net_device *dev = idev->dev;
WARN_ON(!list_empty(&idev->addr_list));
WARN_ON(idev->mc_list);
WARN_ON(rcu_access_pointer(idev->mc_list));
WARN_ON(timer_pending(&idev->rs_timer));
#ifdef NET_REFCNT_DEBUG

View file

@ -222,7 +222,7 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
inet->mc_loop = 1;
inet->mc_ttl = 1;
inet->mc_index = 0;
inet->mc_list = NULL;
RCU_INIT_POINTER(inet->mc_list, NULL);
inet->rcv_tos = 0;
if (net->ipv4.sysctl_ip_no_pmtu_disc)

View file

@ -944,11 +944,11 @@ static int icmpv6_rcv(struct sk_buff *skb)
case ICMPV6_MGM_QUERY:
igmp6_event_query(skb);
break;
return 0;
case ICMPV6_MGM_REPORT:
igmp6_event_report(skb);
break;
return 0;
case ICMPV6_MGM_REDUCTION:
case ICMPV6_NI_QUERY:

File diff suppressed because it is too large Load diff