linux/include/net/request_sock.h
Kuniyuki Iwashima 956c0d6191 tcp: Clear req->syncookie in reqsk_alloc().
syzkaller reported a read of uninit req->syncookie. [0]

Originally, req->syncookie was used only in tcp_conn_request()
to indicate if we need to encode SYN cookie in SYN+ACK, so the
field remains uninitialised in other places.

The commit 695751e31a ("bpf: tcp: Handle BPF SYN Cookie in
cookie_v[46]_check().") added another meaning in ACK path;
req->syncookie is set true if SYN cookie is validated by BPF
kfunc.

After the change, cookie_v[46]_check() always read req->syncookie,
but it is not initialised in the normal SYN cookie case as reported
by KMSAN.

Let's make sure we always initialise req->syncookie in reqsk_alloc().

[0]:
BUG: KMSAN: uninit-value in cookie_v4_check+0x22b7/0x29e0
 net/ipv4/syncookies.c:477
 cookie_v4_check+0x22b7/0x29e0 net/ipv4/syncookies.c:477
 tcp_v4_cookie_check net/ipv4/tcp_ipv4.c:1855 [inline]
 tcp_v4_do_rcv+0xb17/0x10b0 net/ipv4/tcp_ipv4.c:1914
 tcp_v4_rcv+0x4ce4/0x5420 net/ipv4/tcp_ipv4.c:2322
 ip_protocol_deliver_rcu+0x2a3/0x13d0 net/ipv4/ip_input.c:205
 ip_local_deliver_finish+0x332/0x500 net/ipv4/ip_input.c:233
 NF_HOOK include/linux/netfilter.h:314 [inline]
 ip_local_deliver+0x21f/0x490 net/ipv4/ip_input.c:254
 dst_input include/net/dst.h:460 [inline]
 ip_rcv_finish+0x4a2/0x520 net/ipv4/ip_input.c:449
 NF_HOOK include/linux/netfilter.h:314 [inline]
 ip_rcv+0xcd/0x380 net/ipv4/ip_input.c:569
 __netif_receive_skb_one_core net/core/dev.c:5538 [inline]
 __netif_receive_skb+0x319/0x9e0 net/core/dev.c:5652
 process_backlog+0x480/0x8b0 net/core/dev.c:5981
 __napi_poll+0xe7/0x980 net/core/dev.c:6632
 napi_poll net/core/dev.c:6701 [inline]
 net_rx_action+0x89d/0x1820 net/core/dev.c:6813
 __do_softirq+0x1c0/0x7d7 kernel/softirq.c:554
 do_softirq+0x9a/0x100 kernel/softirq.c:455
 __local_bh_enable_ip+0x9f/0xb0 kernel/softirq.c:382
 local_bh_enable include/linux/bottom_half.h:33 [inline]
 rcu_read_unlock_bh include/linux/rcupdate.h:820 [inline]
 __dev_queue_xmit+0x2776/0x52c0 net/core/dev.c:4362
 dev_queue_xmit include/linux/netdevice.h:3091 [inline]
 neigh_hh_output include/net/neighbour.h:526 [inline]
 neigh_output include/net/neighbour.h:540 [inline]
 ip_finish_output2+0x187a/0x1b70 net/ipv4/ip_output.c:235
 __ip_finish_output+0x287/0x810
 ip_finish_output+0x4b/0x550 net/ipv4/ip_output.c:323
 NF_HOOK_COND include/linux/netfilter.h:303 [inline]
 ip_output+0x15f/0x3f0 net/ipv4/ip_output.c:433
 dst_output include/net/dst.h:450 [inline]
 ip_local_out net/ipv4/ip_output.c:129 [inline]
 __ip_queue_xmit+0x1e93/0x2030 net/ipv4/ip_output.c:535
 ip_queue_xmit+0x60/0x80 net/ipv4/ip_output.c:549
 __tcp_transmit_skb+0x3c70/0x4890 net/ipv4/tcp_output.c:1462
 tcp_transmit_skb net/ipv4/tcp_output.c:1480 [inline]
 tcp_write_xmit+0x3ee1/0x8900 net/ipv4/tcp_output.c:2792
 __tcp_push_pending_frames net/ipv4/tcp_output.c:2977 [inline]
 tcp_send_fin+0xa90/0x12e0 net/ipv4/tcp_output.c:3578
 tcp_shutdown+0x198/0x1f0 net/ipv4/tcp.c:2716
 inet_shutdown+0x33f/0x5b0 net/ipv4/af_inet.c:923
 __sys_shutdown_sock net/socket.c:2425 [inline]
 __sys_shutdown net/socket.c:2437 [inline]
 __do_sys_shutdown net/socket.c:2445 [inline]
 __se_sys_shutdown+0x2a4/0x440 net/socket.c:2443
 __x64_sys_shutdown+0x6c/0xa0 net/socket.c:2443
 do_syscall_64+0xd5/0x1f0
 entry_SYSCALL_64_after_hwframe+0x6d/0x75

Uninit was stored to memory at:
 reqsk_alloc include/net/request_sock.h:148 [inline]
 inet_reqsk_alloc+0x651/0x7a0 net/ipv4/tcp_input.c:6978
 cookie_tcp_reqsk_alloc+0xd4/0x900 net/ipv4/syncookies.c:328
 cookie_tcp_check net/ipv4/syncookies.c:388 [inline]
 cookie_v4_check+0x289f/0x29e0 net/ipv4/syncookies.c:420
 tcp_v4_cookie_check net/ipv4/tcp_ipv4.c:1855 [inline]
 tcp_v4_do_rcv+0xb17/0x10b0 net/ipv4/tcp_ipv4.c:1914
 tcp_v4_rcv+0x4ce4/0x5420 net/ipv4/tcp_ipv4.c:2322
 ip_protocol_deliver_rcu+0x2a3/0x13d0 net/ipv4/ip_input.c:205
 ip_local_deliver_finish+0x332/0x500 net/ipv4/ip_input.c:233
 NF_HOOK include/linux/netfilter.h:314 [inline]
 ip_local_deliver+0x21f/0x490 net/ipv4/ip_input.c:254
 dst_input include/net/dst.h:460 [inline]
 ip_rcv_finish+0x4a2/0x520 net/ipv4/ip_input.c:449
 NF_HOOK include/linux/netfilter.h:314 [inline]
 ip_rcv+0xcd/0x380 net/ipv4/ip_input.c:569
 __netif_receive_skb_one_core net/core/dev.c:5538 [inline]
 __netif_receive_skb+0x319/0x9e0 net/core/dev.c:5652
 process_backlog+0x480/0x8b0 net/core/dev.c:5981
 __napi_poll+0xe7/0x980 net/core/dev.c:6632
 napi_poll net/core/dev.c:6701 [inline]
 net_rx_action+0x89d/0x1820 net/core/dev.c:6813
 __do_softirq+0x1c0/0x7d7 kernel/softirq.c:554

Uninit was created at:
 __alloc_pages+0x9a7/0xe00 mm/page_alloc.c:4592
 __alloc_pages_node include/linux/gfp.h:238 [inline]
 alloc_pages_node include/linux/gfp.h:261 [inline]
 alloc_slab_page mm/slub.c:2175 [inline]
 allocate_slab mm/slub.c:2338 [inline]
 new_slab+0x2de/0x1400 mm/slub.c:2391
 ___slab_alloc+0x1184/0x33d0 mm/slub.c:3525
 __slab_alloc mm/slub.c:3610 [inline]
 __slab_alloc_node mm/slub.c:3663 [inline]
 slab_alloc_node mm/slub.c:3835 [inline]
 kmem_cache_alloc+0x6d3/0xbe0 mm/slub.c:3852
 reqsk_alloc include/net/request_sock.h:131 [inline]
 inet_reqsk_alloc+0x66/0x7a0 net/ipv4/tcp_input.c:6978
 tcp_conn_request+0x484/0x44e0 net/ipv4/tcp_input.c:7135
 tcp_v4_conn_request+0x16f/0x1d0 net/ipv4/tcp_ipv4.c:1716
 tcp_rcv_state_process+0x2e5/0x4bb0 net/ipv4/tcp_input.c:6655
 tcp_v4_do_rcv+0xbfd/0x10b0 net/ipv4/tcp_ipv4.c:1929
 tcp_v4_rcv+0x4ce4/0x5420 net/ipv4/tcp_ipv4.c:2322
 ip_protocol_deliver_rcu+0x2a3/0x13d0 net/ipv4/ip_input.c:205
 ip_local_deliver_finish+0x332/0x500 net/ipv4/ip_input.c:233
 NF_HOOK include/linux/netfilter.h:314 [inline]
 ip_local_deliver+0x21f/0x490 net/ipv4/ip_input.c:254
 dst_input include/net/dst.h:460 [inline]
 ip_sublist_rcv_finish net/ipv4/ip_input.c:580 [inline]
 ip_list_rcv_finish net/ipv4/ip_input.c:631 [inline]
 ip_sublist_rcv+0x15f3/0x17f0 net/ipv4/ip_input.c:639
 ip_list_rcv+0x9ef/0xa40 net/ipv4/ip_input.c:674
 __netif_receive_skb_list_ptype net/core/dev.c:5581 [inline]
 __netif_receive_skb_list_core+0x15c5/0x1670 net/core/dev.c:5629
 __netif_receive_skb_list net/core/dev.c:5681 [inline]
 netif_receive_skb_list_internal+0x106c/0x16f0 net/core/dev.c:5773
 gro_normal_list include/net/gro.h:438 [inline]
 napi_complete_done+0x425/0x880 net/core/dev.c:6113
 virtqueue_napi_complete drivers/net/virtio_net.c:465 [inline]
 virtnet_poll+0x149d/0x2240 drivers/net/virtio_net.c:2211
 __napi_poll+0xe7/0x980 net/core/dev.c:6632
 napi_poll net/core/dev.c:6701 [inline]
 net_rx_action+0x89d/0x1820 net/core/dev.c:6813
 __do_softirq+0x1c0/0x7d7 kernel/softirq.c:554

CPU: 0 PID: 16792 Comm: syz-executor.2 Not tainted 6.8.0-syzkaller-05562-g61387b8dcf1d #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/29/2024

Fixes: 695751e31a ("bpf: tcp: Handle BPF SYN Cookie in cookie_v[46]_check().")
Reported-by: syzkaller <syzkaller@googlegroups.com>
Reported-by: Eric Dumazet <edumazet@google.com>
Closes: https://lore.kernel.org/bpf/CANn89iKdN9c+C_2JAUbc+VY3DDQjAQukMtiBbormAmAk9CdvQA@mail.gmail.com/
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20240315224710.55209-1-kuniyu@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-03-19 19:35:59 -07:00

286 lines
7.5 KiB
C

/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* NET Generic infrastructure for Network protocols.
*
* Definitions for request_sock
*
* Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* From code originally in include/net/tcp.h
*/
#ifndef _REQUEST_SOCK_H
#define _REQUEST_SOCK_H
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/bug.h>
#include <linux/refcount.h>
#include <net/sock.h>
struct request_sock;
struct sk_buff;
struct dst_entry;
struct proto;
struct request_sock_ops {
int family;
unsigned int obj_size;
struct kmem_cache *slab;
char *slab_name;
int (*rtx_syn_ack)(const struct sock *sk,
struct request_sock *req);
void (*send_ack)(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req);
void (*send_reset)(const struct sock *sk,
struct sk_buff *skb);
void (*destructor)(struct request_sock *req);
void (*syn_ack_timeout)(const struct request_sock *req);
};
int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req);
struct saved_syn {
u32 mac_hdrlen;
u32 network_hdrlen;
u32 tcp_hdrlen;
u8 data[];
};
/* struct request_sock - mini sock to represent a connection request
*/
struct request_sock {
struct sock_common __req_common;
#define rsk_refcnt __req_common.skc_refcnt
#define rsk_hash __req_common.skc_hash
#define rsk_listener __req_common.skc_listener
#define rsk_window_clamp __req_common.skc_window_clamp
#define rsk_rcv_wnd __req_common.skc_rcv_wnd
struct request_sock *dl_next;
u16 mss;
u8 num_retrans; /* number of retransmits */
u8 syncookie:1; /* True if
* 1) tcpopts needs to be encoded in
* TS of SYN+ACK
* 2) ACK is validated by BPF kfunc.
*/
u8 num_timeout:7; /* number of timeouts */
u32 ts_recent;
struct timer_list rsk_timer;
const struct request_sock_ops *rsk_ops;
struct sock *sk;
struct saved_syn *saved_syn;
u32 secid;
u32 peer_secid;
u32 timeout;
};
static inline struct request_sock *inet_reqsk(const struct sock *sk)
{
return (struct request_sock *)sk;
}
static inline struct sock *req_to_sk(struct request_sock *req)
{
return (struct sock *)req;
}
/**
* skb_steal_sock - steal a socket from an sk_buff
* @skb: sk_buff to steal the socket from
* @refcounted: is set to true if the socket is reference-counted
* @prefetched: is set to true if the socket was assigned from bpf
*/
static inline struct sock *skb_steal_sock(struct sk_buff *skb,
bool *refcounted, bool *prefetched)
{
struct sock *sk = skb->sk;
if (!sk) {
*prefetched = false;
*refcounted = false;
return NULL;
}
*prefetched = skb_sk_is_prefetched(skb);
if (*prefetched) {
#if IS_ENABLED(CONFIG_SYN_COOKIES)
if (sk->sk_state == TCP_NEW_SYN_RECV && inet_reqsk(sk)->syncookie) {
struct request_sock *req = inet_reqsk(sk);
*refcounted = false;
sk = req->rsk_listener;
req->rsk_listener = NULL;
return sk;
}
#endif
*refcounted = sk_is_refcounted(sk);
} else {
*refcounted = true;
}
skb->destructor = NULL;
skb->sk = NULL;
return sk;
}
static inline struct request_sock *
reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
bool attach_listener)
{
struct request_sock *req;
req = kmem_cache_alloc(ops->slab, GFP_ATOMIC | __GFP_NOWARN);
if (!req)
return NULL;
req->rsk_listener = NULL;
if (attach_listener) {
if (unlikely(!refcount_inc_not_zero(&sk_listener->sk_refcnt))) {
kmem_cache_free(ops->slab, req);
return NULL;
}
req->rsk_listener = sk_listener;
}
req->rsk_ops = ops;
req_to_sk(req)->sk_prot = sk_listener->sk_prot;
sk_node_init(&req_to_sk(req)->sk_node);
sk_tx_queue_clear(req_to_sk(req));
req->saved_syn = NULL;
req->syncookie = 0;
req->timeout = 0;
req->num_timeout = 0;
req->num_retrans = 0;
req->sk = NULL;
refcount_set(&req->rsk_refcnt, 0);
return req;
}
static inline void __reqsk_free(struct request_sock *req)
{
req->rsk_ops->destructor(req);
if (req->rsk_listener)
sock_put(req->rsk_listener);
kfree(req->saved_syn);
kmem_cache_free(req->rsk_ops->slab, req);
}
static inline void reqsk_free(struct request_sock *req)
{
WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0);
__reqsk_free(req);
}
static inline void reqsk_put(struct request_sock *req)
{
if (refcount_dec_and_test(&req->rsk_refcnt))
reqsk_free(req);
}
/*
* For a TCP Fast Open listener -
* lock - protects the access to all the reqsk, which is co-owned by
* the listener and the child socket.
* qlen - pending TFO requests (still in TCP_SYN_RECV).
* max_qlen - max TFO reqs allowed before TFO is disabled.
*
* XXX (TFO) - ideally these fields can be made as part of "listen_sock"
* structure above. But there is some implementation difficulty due to
* listen_sock being part of request_sock_queue hence will be freed when
* a listener is stopped. But TFO related fields may continue to be
* accessed even after a listener is closed, until its sk_refcnt drops
* to 0 implying no more outstanding TFO reqs. One solution is to keep
* listen_opt around until sk_refcnt drops to 0. But there is some other
* complexity that needs to be resolved. E.g., a listener can be disabled
* temporarily through shutdown()->tcp_disconnect(), and re-enabled later.
*/
struct fastopen_queue {
struct request_sock *rskq_rst_head; /* Keep track of past TFO */
struct request_sock *rskq_rst_tail; /* requests that caused RST.
* This is part of the defense
* against spoofing attack.
*/
spinlock_t lock;
int qlen; /* # of pending (TCP_SYN_RECV) reqs */
int max_qlen; /* != 0 iff TFO is currently enabled */
struct tcp_fastopen_context __rcu *ctx; /* cipher context for cookie */
};
/** struct request_sock_queue - queue of request_socks
*
* @rskq_accept_head - FIFO head of established children
* @rskq_accept_tail - FIFO tail of established children
* @rskq_defer_accept - User waits for some data after accept()
*
*/
struct request_sock_queue {
spinlock_t rskq_lock;
u8 rskq_defer_accept;
u32 synflood_warned;
atomic_t qlen;
atomic_t young;
struct request_sock *rskq_accept_head;
struct request_sock *rskq_accept_tail;
struct fastopen_queue fastopenq; /* Check max_qlen != 0 to determine
* if TFO is enabled.
*/
};
void reqsk_queue_alloc(struct request_sock_queue *queue);
void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
bool reset);
static inline bool reqsk_queue_empty(const struct request_sock_queue *queue)
{
return READ_ONCE(queue->rskq_accept_head) == NULL;
}
static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue,
struct sock *parent)
{
struct request_sock *req;
spin_lock_bh(&queue->rskq_lock);
req = queue->rskq_accept_head;
if (req) {
sk_acceptq_removed(parent);
WRITE_ONCE(queue->rskq_accept_head, req->dl_next);
if (queue->rskq_accept_head == NULL)
queue->rskq_accept_tail = NULL;
}
spin_unlock_bh(&queue->rskq_lock);
return req;
}
static inline void reqsk_queue_removed(struct request_sock_queue *queue,
const struct request_sock *req)
{
if (req->num_timeout == 0)
atomic_dec(&queue->young);
atomic_dec(&queue->qlen);
}
static inline void reqsk_queue_added(struct request_sock_queue *queue)
{
atomic_inc(&queue->young);
atomic_inc(&queue->qlen);
}
static inline int reqsk_queue_len(const struct request_sock_queue *queue)
{
return atomic_read(&queue->qlen);
}
static inline int reqsk_queue_len_young(const struct request_sock_queue *queue)
{
return atomic_read(&queue->young);
}
#endif /* _REQUEST_SOCK_H */