linux/include/net/addrconf.h
Jiri Benc 7633c4da91 ipv6: fix race condition between ipv6_get_ifaddr and ipv6_del_addr
Although ipv6_get_ifaddr walks inet6_addr_lst under the RCU lock, it
still means hlist_for_each_entry_rcu can return an item that got removed
from the list. The memory itself of such item is not freed thanks to RCU
but nothing guarantees the actual content of the memory is sane.

In particular, the reference count can be zero. This can happen if
ipv6_del_addr is called in parallel. ipv6_del_addr removes the entry
from inet6_addr_lst (hlist_del_init_rcu(&ifp->addr_lst)) and drops all
references (__in6_ifa_put(ifp) + in6_ifa_put(ifp)). With bad enough
timing, this can happen:

1. In ipv6_get_ifaddr, hlist_for_each_entry_rcu returns an entry.

2. Then, the whole ipv6_del_addr is executed for the given entry. The
   reference count drops to zero and kfree_rcu is scheduled.

3. ipv6_get_ifaddr continues and tries to increments the reference count
   (in6_ifa_hold).

4. The rcu is unlocked and the entry is freed.

5. The freed entry is returned.

Prevent increasing of the reference count in such case. The name
in6_ifa_hold_safe is chosen to mimic the existing fib6_info_hold_safe.

[   41.506330] refcount_t: addition on 0; use-after-free.
[   41.506760] WARNING: CPU: 0 PID: 595 at lib/refcount.c:25 refcount_warn_saturate+0xa5/0x130
[   41.507413] Modules linked in: veth bridge stp llc
[   41.507821] CPU: 0 PID: 595 Comm: python3 Not tainted 6.9.0-rc2.main-00208-g49563be82afa #14
[   41.508479] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
[   41.509163] RIP: 0010:refcount_warn_saturate+0xa5/0x130
[   41.509586] Code: ad ff 90 0f 0b 90 90 c3 cc cc cc cc 80 3d c0 30 ad 01 00 75 a0 c6 05 b7 30 ad 01 01 90 48 c7 c7 38 cc 7a 8c e8 cc 18 ad ff 90 <0f> 0b 90 90 c3 cc cc cc cc 80 3d 98 30 ad 01 00 0f 85 75 ff ff ff
[   41.510956] RSP: 0018:ffffbda3c026baf0 EFLAGS: 00010282
[   41.511368] RAX: 0000000000000000 RBX: ffff9e9c46914800 RCX: 0000000000000000
[   41.511910] RDX: ffff9e9c7ec29c00 RSI: ffff9e9c7ec1c900 RDI: ffff9e9c7ec1c900
[   41.512445] RBP: ffff9e9c43660c9c R08: 0000000000009ffb R09: 00000000ffffdfff
[   41.512998] R10: 00000000ffffdfff R11: ffffffff8ca58a40 R12: ffff9e9c4339a000
[   41.513534] R13: 0000000000000001 R14: ffff9e9c438a0000 R15: ffffbda3c026bb48
[   41.514086] FS:  00007fbc4cda1740(0000) GS:ffff9e9c7ec00000(0000) knlGS:0000000000000000
[   41.514726] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   41.515176] CR2: 000056233b337d88 CR3: 000000000376e006 CR4: 0000000000370ef0
[   41.515713] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   41.516252] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[   41.516799] Call Trace:
[   41.517037]  <TASK>
[   41.517249]  ? __warn+0x7b/0x120
[   41.517535]  ? refcount_warn_saturate+0xa5/0x130
[   41.517923]  ? report_bug+0x164/0x190
[   41.518240]  ? handle_bug+0x3d/0x70
[   41.518541]  ? exc_invalid_op+0x17/0x70
[   41.520972]  ? asm_exc_invalid_op+0x1a/0x20
[   41.521325]  ? refcount_warn_saturate+0xa5/0x130
[   41.521708]  ipv6_get_ifaddr+0xda/0xe0
[   41.522035]  inet6_rtm_getaddr+0x342/0x3f0
[   41.522376]  ? __pfx_inet6_rtm_getaddr+0x10/0x10
[   41.522758]  rtnetlink_rcv_msg+0x334/0x3d0
[   41.523102]  ? netlink_unicast+0x30f/0x390
[   41.523445]  ? __pfx_rtnetlink_rcv_msg+0x10/0x10
[   41.523832]  netlink_rcv_skb+0x53/0x100
[   41.524157]  netlink_unicast+0x23b/0x390
[   41.524484]  netlink_sendmsg+0x1f2/0x440
[   41.524826]  __sys_sendto+0x1d8/0x1f0
[   41.525145]  __x64_sys_sendto+0x1f/0x30
[   41.525467]  do_syscall_64+0xa5/0x1b0
[   41.525794]  entry_SYSCALL_64_after_hwframe+0x72/0x7a
[   41.526213] RIP: 0033:0x7fbc4cfcea9a
[   41.526528] Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 7e c3 0f 1f 44 00 00 41 54 48 83 ec 30 44 89
[   41.527942] RSP: 002b:00007ffcf54012a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
[   41.528593] RAX: ffffffffffffffda RBX: 00007ffcf5401368 RCX: 00007fbc4cfcea9a
[   41.529173] RDX: 000000000000002c RSI: 00007fbc4b9d9bd0 RDI: 0000000000000005
[   41.529786] RBP: 00007fbc4bafb040 R08: 00007ffcf54013e0 R09: 000000000000000c
[   41.530375] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
[   41.530977] R13: ffffffffc4653600 R14: 0000000000000001 R15: 00007fbc4ca85d1b
[   41.531573]  </TASK>

Fixes: 5c578aedcb ("IPv6: convert addrconf hash list to RCU")
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Jiri Benc <jbenc@redhat.com>
Link: https://lore.kernel.org/r/8ab821e36073a4a406c50ec83c9e8dc586c539e4.1712585809.git.jbenc@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-04-09 17:09:05 -07:00

523 lines
14 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ADDRCONF_H
#define _ADDRCONF_H
#define MAX_RTR_SOLICITATIONS -1 /* unlimited */
#define RTR_SOLICITATION_INTERVAL (4*HZ)
#define RTR_SOLICITATION_MAX_INTERVAL (3600*HZ) /* 1 hour */
#define MIN_VALID_LIFETIME (2*3600) /* 2 hours */
#define TEMP_VALID_LIFETIME (7*86400) /* 1 week */
#define TEMP_PREFERRED_LIFETIME (86400) /* 24 hours */
#define REGEN_MIN_ADVANCE (2) /* 2 seconds */
#define REGEN_MAX_RETRY (3)
#define MAX_DESYNC_FACTOR (600)
#define ADDR_CHECK_FREQUENCY (120*HZ)
#define IPV6_MAX_ADDRESSES 16
#define ADDRCONF_TIMER_FUZZ_MINUS (HZ > 50 ? HZ / 50 : 1)
#define ADDRCONF_TIMER_FUZZ (HZ / 4)
#define ADDRCONF_TIMER_FUZZ_MAX (HZ)
#define ADDRCONF_NOTIFY_PRIORITY 0
#include <linux/in.h>
#include <linux/in6.h>
struct prefix_info {
__u8 type;
__u8 length;
__u8 prefix_len;
union __packed {
__u8 flags;
struct __packed {
#if defined(__BIG_ENDIAN_BITFIELD)
__u8 onlink : 1,
autoconf : 1,
reserved : 6;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
__u8 reserved : 6,
autoconf : 1,
onlink : 1;
#else
#error "Please fix <asm/byteorder.h>"
#endif
};
};
__be32 valid;
__be32 prefered;
__be32 reserved2;
struct in6_addr prefix;
};
/* rfc4861 4.6.2: IPv6 PIO is 32 bytes in size */
static_assert(sizeof(struct prefix_info) == 32);
#include <linux/ipv6.h>
#include <linux/netdevice.h>
#include <net/if_inet6.h>
#include <net/ipv6.h>
struct in6_validator_info {
struct in6_addr i6vi_addr;
struct inet6_dev *i6vi_dev;
struct netlink_ext_ack *extack;
};
struct ifa6_config {
const struct in6_addr *pfx;
unsigned int plen;
u8 ifa_proto;
const struct in6_addr *peer_pfx;
u32 rt_priority;
u32 ifa_flags;
u32 preferred_lft;
u32 valid_lft;
u16 scope;
};
int addrconf_init(void);
void addrconf_cleanup(void);
int addrconf_add_ifaddr(struct net *net, void __user *arg);
int addrconf_del_ifaddr(struct net *net, void __user *arg);
int addrconf_set_dstaddr(struct net *net, void __user *arg);
int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
const struct net_device *dev, int strict);
int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
const struct net_device *dev, bool skip_dev_check,
int strict, u32 banned_flags);
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr);
#endif
int ipv6_chk_rpl_srh_loop(struct net *net, const struct in6_addr *segs,
unsigned char nsegs);
bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
const unsigned int prefix_len,
struct net_device *dev);
int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev);
struct net_device *ipv6_dev_find(struct net *net, const struct in6_addr *addr,
struct net_device *dev);
struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net,
const struct in6_addr *addr,
struct net_device *dev, int strict);
int ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
const struct in6_addr *daddr, unsigned int srcprefs,
struct in6_addr *saddr);
int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
u32 banned_flags);
bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
bool match_wildcard);
bool inet_rcv_saddr_any(const struct sock *sk);
void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr);
void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr);
void addrconf_add_linklocal(struct inet6_dev *idev,
const struct in6_addr *addr, u32 flags);
int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
const struct prefix_info *pinfo,
struct inet6_dev *in6_dev,
const struct in6_addr *addr, int addr_type,
u32 addr_flags, bool sllao, bool tokenized,
__u32 valid_lft, u32 prefered_lft);
static inline void addrconf_addr_eui48_base(u8 *eui, const char *const addr)
{
memcpy(eui, addr, 3);
eui[3] = 0xFF;
eui[4] = 0xFE;
memcpy(eui + 5, addr + 3, 3);
}
static inline void addrconf_addr_eui48(u8 *eui, const char *const addr)
{
addrconf_addr_eui48_base(eui, addr);
eui[0] ^= 2;
}
static inline int addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
{
if (dev->addr_len != ETH_ALEN)
return -1;
/*
* The zSeries OSA network cards can be shared among various
* OS instances, but the OSA cards have only one MAC address.
* This leads to duplicate address conflicts in conjunction
* with IPv6 if more than one instance uses the same card.
*
* The driver for these cards can deliver a unique 16-bit
* identifier for each instance sharing the same card. It is
* placed instead of 0xFFFE in the interface identifier. The
* "u" bit of the interface identifier is not inverted in this
* case. Hence the resulting interface identifier has local
* scope according to RFC2373.
*/
addrconf_addr_eui48_base(eui, dev->dev_addr);
if (dev->dev_id) {
eui[3] = (dev->dev_id >> 8) & 0xFF;
eui[4] = dev->dev_id & 0xFF;
} else {
eui[0] ^= 2;
}
return 0;
}
static inline unsigned long addrconf_timeout_fixup(u32 timeout,
unsigned int unit)
{
if (timeout == 0xffffffff)
return ~0UL;
/*
* Avoid arithmetic overflow.
* Assuming unit is constant and non-zero, this "if" statement
* will go away on 64bit archs.
*/
if (0xfffffffe > LONG_MAX / unit && timeout > LONG_MAX / unit)
return LONG_MAX / unit;
return timeout;
}
static inline int addrconf_finite_timeout(unsigned long timeout)
{
return ~timeout;
}
/*
* IPv6 Address Label subsystem (addrlabel.c)
*/
int ipv6_addr_label_init(void);
void ipv6_addr_label_cleanup(void);
int ipv6_addr_label_rtnl_register(void);
u32 ipv6_addr_label(struct net *net, const struct in6_addr *addr,
int type, int ifindex);
/*
* multicast prototypes (mcast.c)
*/
static inline bool ipv6_mc_may_pull(struct sk_buff *skb,
unsigned int len)
{
if (skb_transport_offset(skb) + ipv6_transport_len(skb) < len)
return false;
return pskb_may_pull(skb, len);
}
int ipv6_sock_mc_join(struct sock *sk, int ifindex,
const struct in6_addr *addr);
int ipv6_sock_mc_drop(struct sock *sk, int ifindex,
const struct in6_addr *addr);
void __ipv6_sock_mc_close(struct sock *sk);
void ipv6_sock_mc_close(struct sock *sk);
bool inet6_mc_check(const struct sock *sk, const struct in6_addr *mc_addr,
const struct in6_addr *src_addr);
int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr);
int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr);
int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr);
void ipv6_mc_up(struct inet6_dev *idev);
void ipv6_mc_down(struct inet6_dev *idev);
void ipv6_mc_unmap(struct inet6_dev *idev);
void ipv6_mc_remap(struct inet6_dev *idev);
void ipv6_mc_init_dev(struct inet6_dev *idev);
void ipv6_mc_destroy_dev(struct inet6_dev *idev);
int ipv6_mc_check_mld(struct sk_buff *skb);
void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp);
bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
const struct in6_addr *src_addr);
void ipv6_mc_dad_complete(struct inet6_dev *idev);
/*
* identify MLD packets for MLD filter exceptions
*/
static inline bool ipv6_is_mld(struct sk_buff *skb, int nexthdr, int offset)
{
struct icmp6hdr *hdr;
if (nexthdr != IPPROTO_ICMPV6 ||
!pskb_network_may_pull(skb, offset + sizeof(struct icmp6hdr)))
return false;
hdr = (struct icmp6hdr *)(skb_network_header(skb) + offset);
switch (hdr->icmp6_type) {
case ICMPV6_MGM_QUERY:
case ICMPV6_MGM_REPORT:
case ICMPV6_MGM_REDUCTION:
case ICMPV6_MLD2_REPORT:
return true;
default:
break;
}
return false;
}
void addrconf_prefix_rcv(struct net_device *dev,
u8 *opt, int len, bool sllao);
/*
* anycast prototypes (anycast.c)
*/
int ipv6_sock_ac_join(struct sock *sk, int ifindex,
const struct in6_addr *addr);
int ipv6_sock_ac_drop(struct sock *sk, int ifindex,
const struct in6_addr *addr);
void __ipv6_sock_ac_close(struct sock *sk);
void ipv6_sock_ac_close(struct sock *sk);
int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr);
int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr);
void ipv6_ac_destroy_dev(struct inet6_dev *idev);
bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
const struct in6_addr *addr);
bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
const struct in6_addr *addr);
int ipv6_anycast_init(void);
void ipv6_anycast_cleanup(void);
/* Device notifier */
int register_inet6addr_notifier(struct notifier_block *nb);
int unregister_inet6addr_notifier(struct notifier_block *nb);
int inet6addr_notifier_call_chain(unsigned long val, void *v);
int register_inet6addr_validator_notifier(struct notifier_block *nb);
int unregister_inet6addr_validator_notifier(struct notifier_block *nb);
int inet6addr_validator_notifier_call_chain(unsigned long val, void *v);
void inet6_netconf_notify_devconf(struct net *net, int event, int type,
int ifindex, struct ipv6_devconf *devconf);
/**
* __in6_dev_get - get inet6_dev pointer from netdevice
* @dev: network device
*
* Caller must hold rcu_read_lock or RTNL, because this function
* does not take a reference on the inet6_dev.
*/
static inline struct inet6_dev *__in6_dev_get(const struct net_device *dev)
{
return rcu_dereference_rtnl(dev->ip6_ptr);
}
/**
* __in6_dev_stats_get - get inet6_dev pointer for stats
* @dev: network device
* @skb: skb for original incoming interface if neeeded
*
* Caller must hold rcu_read_lock or RTNL, because this function
* does not take a reference on the inet6_dev.
*/
static inline struct inet6_dev *__in6_dev_stats_get(const struct net_device *dev,
const struct sk_buff *skb)
{
if (netif_is_l3_master(dev))
dev = dev_get_by_index_rcu(dev_net(dev), inet6_iif(skb));
return __in6_dev_get(dev);
}
/**
* __in6_dev_get_safely - get inet6_dev pointer from netdevice
* @dev: network device
*
* This is a safer version of __in6_dev_get
*/
static inline struct inet6_dev *__in6_dev_get_safely(const struct net_device *dev)
{
if (likely(dev))
return rcu_dereference_rtnl(dev->ip6_ptr);
else
return NULL;
}
/**
* in6_dev_get - get inet6_dev pointer from netdevice
* @dev: network device
*
* This version can be used in any context, and takes a reference
* on the inet6_dev. Callers must use in6_dev_put() later to
* release this reference.
*/
static inline struct inet6_dev *in6_dev_get(const struct net_device *dev)
{
struct inet6_dev *idev;
rcu_read_lock();
idev = rcu_dereference(dev->ip6_ptr);
if (idev)
refcount_inc(&idev->refcnt);
rcu_read_unlock();
return idev;
}
static inline struct neigh_parms *__in6_dev_nd_parms_get_rcu(const struct net_device *dev)
{
struct inet6_dev *idev = __in6_dev_get(dev);
return idev ? idev->nd_parms : NULL;
}
void in6_dev_finish_destroy(struct inet6_dev *idev);
static inline void in6_dev_put(struct inet6_dev *idev)
{
if (refcount_dec_and_test(&idev->refcnt))
in6_dev_finish_destroy(idev);
}
static inline void in6_dev_put_clear(struct inet6_dev **pidev)
{
struct inet6_dev *idev = *pidev;
if (idev) {
in6_dev_put(idev);
*pidev = NULL;
}
}
static inline void __in6_dev_put(struct inet6_dev *idev)
{
refcount_dec(&idev->refcnt);
}
static inline void in6_dev_hold(struct inet6_dev *idev)
{
refcount_inc(&idev->refcnt);
}
/* called with rcu_read_lock held */
static inline bool ip6_ignore_linkdown(const struct net_device *dev)
{
const struct inet6_dev *idev = __in6_dev_get(dev);
if (unlikely(!idev))
return true;
return !!READ_ONCE(idev->cnf.ignore_routes_with_linkdown);
}
void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp);
static inline void in6_ifa_put(struct inet6_ifaddr *ifp)
{
if (refcount_dec_and_test(&ifp->refcnt))
inet6_ifa_finish_destroy(ifp);
}
static inline void __in6_ifa_put(struct inet6_ifaddr *ifp)
{
refcount_dec(&ifp->refcnt);
}
static inline void in6_ifa_hold(struct inet6_ifaddr *ifp)
{
refcount_inc(&ifp->refcnt);
}
static inline bool in6_ifa_hold_safe(struct inet6_ifaddr *ifp)
{
return refcount_inc_not_zero(&ifp->refcnt);
}
/*
* compute link-local solicited-node multicast address
*/
static inline void addrconf_addr_solict_mult(const struct in6_addr *addr,
struct in6_addr *solicited)
{
ipv6_addr_set(solicited,
htonl(0xFF020000), 0,
htonl(0x1),
htonl(0xFF000000) | addr->s6_addr32[3]);
}
static inline bool ipv6_addr_is_ll_all_nodes(const struct in6_addr *addr)
{
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
__be64 *p = (__force __be64 *)addr;
return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) | (p[1] ^ cpu_to_be64(1))) == 0UL;
#else
return ((addr->s6_addr32[0] ^ htonl(0xff020000)) |
addr->s6_addr32[1] | addr->s6_addr32[2] |
(addr->s6_addr32[3] ^ htonl(0x00000001))) == 0;
#endif
}
static inline bool ipv6_addr_is_ll_all_routers(const struct in6_addr *addr)
{
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
__be64 *p = (__force __be64 *)addr;
return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) | (p[1] ^ cpu_to_be64(2))) == 0UL;
#else
return ((addr->s6_addr32[0] ^ htonl(0xff020000)) |
addr->s6_addr32[1] | addr->s6_addr32[2] |
(addr->s6_addr32[3] ^ htonl(0x00000002))) == 0;
#endif
}
static inline bool ipv6_addr_is_isatap(const struct in6_addr *addr)
{
return (addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE);
}
static inline bool ipv6_addr_is_solict_mult(const struct in6_addr *addr)
{
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
__be64 *p = (__force __be64 *)addr;
return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) |
((p[1] ^ cpu_to_be64(0x00000001ff000000UL)) &
cpu_to_be64(0xffffffffff000000UL))) == 0UL;
#else
return ((addr->s6_addr32[0] ^ htonl(0xff020000)) |
addr->s6_addr32[1] |
(addr->s6_addr32[2] ^ htonl(0x00000001)) |
(addr->s6_addr[12] ^ 0xff)) == 0;
#endif
}
static inline bool ipv6_addr_is_all_snoopers(const struct in6_addr *addr)
{
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
__be64 *p = (__force __be64 *)addr;
return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) |
(p[1] ^ cpu_to_be64(0x6a))) == 0UL;
#else
return ((addr->s6_addr32[0] ^ htonl(0xff020000)) |
addr->s6_addr32[1] | addr->s6_addr32[2] |
(addr->s6_addr32[3] ^ htonl(0x0000006a))) == 0;
#endif
}
#ifdef CONFIG_PROC_FS
int if6_proc_init(void);
void if6_proc_exit(void);
#endif
#endif