linux/net/smc/smc_close.c
Ursula Braun 5fb8642a17 net/smc: fix sock refcounting in case of termination
When an ISM device is removed, all its linkgroups are terminated,
i.e. all the corresponding connections are killed.
Connection killing invokes smc_close_active_abort(), which decreases
the sock refcount for certain states to simulate passive closing.
And it cancels the close worker and has to give up the sock lock for
this timeframe. This opens the door for a passive close worker or a
socket close to run in between. In this case smc_close_active_abort() and
passive close worker resp. smc_release() might do a sock_put for passive
closing. This causes:

[ 1323.315943] refcount_t: underflow; use-after-free.
[ 1323.316055] WARNING: CPU: 3 PID: 54469 at lib/refcount.c:28 refcount_warn_saturate+0xe8/0x130
[ 1323.316069] Kernel panic - not syncing: panic_on_warn set ...
[ 1323.316084] CPU: 3 PID: 54469 Comm: uperf Not tainted 5.9.0-20200826.rc2.git0.46328853ed20.300.fc32.s390x+debug #1
[ 1323.316096] Hardware name: IBM 2964 NC9 702 (z/VM 6.4.0)
[ 1323.316108] Call Trace:
[ 1323.316125]  [<00000000c0d4aae8>] show_stack+0x90/0xf8
[ 1323.316143]  [<00000000c15989b0>] dump_stack+0xa8/0xe8
[ 1323.316158]  [<00000000c0d8344e>] panic+0x11e/0x288
[ 1323.316173]  [<00000000c0d83144>] __warn+0xac/0x158
[ 1323.316187]  [<00000000c1597a7a>] report_bug+0xb2/0x130
[ 1323.316201]  [<00000000c0d36424>] monitor_event_exception+0x44/0xc0
[ 1323.316219]  [<00000000c195c716>] pgm_check_handler+0x1da/0x238
[ 1323.316234]  [<00000000c151844c>] refcount_warn_saturate+0xec/0x130
[ 1323.316280] ([<00000000c1518448>] refcount_warn_saturate+0xe8/0x130)
[ 1323.316310]  [<000003ff801f2e2a>] smc_release+0x192/0x1c8 [smc]
[ 1323.316323]  [<00000000c169f1fa>] __sock_release+0x5a/0xe0
[ 1323.316334]  [<00000000c169f2ac>] sock_close+0x2c/0x40
[ 1323.316350]  [<00000000c1086de0>] __fput+0xb8/0x278
[ 1323.316362]  [<00000000c0db1e0e>] task_work_run+0x76/0xb8
[ 1323.316393]  [<00000000c0d8ab84>] do_exit+0x26c/0x520
[ 1323.316408]  [<00000000c0d8af08>] do_group_exit+0x48/0xc0
[ 1323.316421]  [<00000000c0d8afa8>] __s390x_sys_exit_group+0x28/0x38
[ 1323.316433]  [<00000000c195c32c>] system_call+0xe0/0x2b4
[ 1323.316446] 1 lock held by uperf/54469:
[ 1323.316456]  #0: 0000000044125e60 (&sb->s_type->i_mutex_key#9){+.+.}-{3:3}, at: __sock_release+0x44/0xe0

The patch rechecks sock state in smc_close_active_abort() after
smc_close_cancel_work() to avoid duplicate decrease of sock
refcount for the same purpose.

Fixes: 611b63a127 ("net/smc: cancel tx worker in case of socket aborts")
Reviewed-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-03 16:52:33 -07:00

490 lines
12 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
* Socket Closing - normal and abnormal
*
* Copyright IBM Corp. 2016
*
* Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
*/
#include <linux/workqueue.h>
#include <linux/sched/signal.h>
#include <net/sock.h>
#include <net/tcp.h>
#include "smc.h"
#include "smc_tx.h"
#include "smc_cdc.h"
#include "smc_close.h"
/* release the clcsock that is assigned to the smc_sock */
void smc_clcsock_release(struct smc_sock *smc)
{
struct socket *tcp;
if (smc->listen_smc && current_work() != &smc->smc_listen_work)
cancel_work_sync(&smc->smc_listen_work);
mutex_lock(&smc->clcsock_release_lock);
if (smc->clcsock) {
tcp = smc->clcsock;
smc->clcsock = NULL;
sock_release(tcp);
}
mutex_unlock(&smc->clcsock_release_lock);
}
static void smc_close_cleanup_listen(struct sock *parent)
{
struct sock *sk;
/* Close non-accepted connections */
while ((sk = smc_accept_dequeue(parent, NULL)))
smc_close_non_accepted(sk);
}
/* wait for sndbuf data being transmitted */
static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = &smc->sk;
if (!timeout)
return;
if (!smc_tx_prepared_sends(&smc->conn))
return;
smc->wait_close_tx_prepared = 1;
add_wait_queue(sk_sleep(sk), &wait);
while (!signal_pending(current) && timeout) {
int rc;
rc = sk_wait_event(sk, &timeout,
!smc_tx_prepared_sends(&smc->conn) ||
sk->sk_err == ECONNABORTED ||
sk->sk_err == ECONNRESET ||
smc->conn.killed,
&wait);
if (rc)
break;
}
remove_wait_queue(sk_sleep(sk), &wait);
smc->wait_close_tx_prepared = 0;
}
void smc_close_wake_tx_prepared(struct smc_sock *smc)
{
if (smc->wait_close_tx_prepared)
/* wake up socket closing */
smc->sk.sk_state_change(&smc->sk);
}
static int smc_close_wr(struct smc_connection *conn)
{
conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1;
return smc_cdc_get_slot_and_msg_send(conn);
}
static int smc_close_final(struct smc_connection *conn)
{
if (atomic_read(&conn->bytes_to_rcv))
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
else
conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1;
if (conn->killed)
return -EPIPE;
return smc_cdc_get_slot_and_msg_send(conn);
}
int smc_close_abort(struct smc_connection *conn)
{
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
return smc_cdc_get_slot_and_msg_send(conn);
}
static void smc_close_cancel_work(struct smc_sock *smc)
{
struct sock *sk = &smc->sk;
release_sock(sk);
cancel_work_sync(&smc->conn.close_work);
cancel_delayed_work_sync(&smc->conn.tx_work);
lock_sock(sk);
}
/* terminate smc socket abnormally - active abort
* link group is terminated, i.e. RDMA communication no longer possible
*/
void smc_close_active_abort(struct smc_sock *smc)
{
struct sock *sk = &smc->sk;
bool release_clcsock = false;
if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) {
sk->sk_err = ECONNABORTED;
if (smc->clcsock && smc->clcsock->sk)
tcp_abort(smc->clcsock->sk, ECONNABORTED);
}
switch (sk->sk_state) {
case SMC_ACTIVE:
case SMC_APPCLOSEWAIT1:
case SMC_APPCLOSEWAIT2:
sk->sk_state = SMC_PEERABORTWAIT;
smc_close_cancel_work(smc);
if (sk->sk_state != SMC_PEERABORTWAIT)
break;
sk->sk_state = SMC_CLOSED;
sock_put(sk); /* (postponed) passive closing */
break;
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
case SMC_PEERFINCLOSEWAIT:
sk->sk_state = SMC_PEERABORTWAIT;
smc_close_cancel_work(smc);
if (sk->sk_state != SMC_PEERABORTWAIT)
break;
sk->sk_state = SMC_CLOSED;
smc_conn_free(&smc->conn);
release_clcsock = true;
sock_put(sk); /* passive closing */
break;
case SMC_PROCESSABORT:
case SMC_APPFINCLOSEWAIT:
sk->sk_state = SMC_PEERABORTWAIT;
smc_close_cancel_work(smc);
if (sk->sk_state != SMC_PEERABORTWAIT)
break;
sk->sk_state = SMC_CLOSED;
smc_conn_free(&smc->conn);
release_clcsock = true;
break;
case SMC_INIT:
case SMC_PEERABORTWAIT:
case SMC_CLOSED:
break;
}
sock_set_flag(sk, SOCK_DEAD);
sk->sk_state_change(sk);
if (release_clcsock) {
release_sock(sk);
smc_clcsock_release(smc);
lock_sock(sk);
}
}
static inline bool smc_close_sent_any_close(struct smc_connection *conn)
{
return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort ||
conn->local_tx_ctrl.conn_state_flags.peer_conn_closed;
}
int smc_close_active(struct smc_sock *smc)
{
struct smc_cdc_conn_state_flags *txflags =
&smc->conn.local_tx_ctrl.conn_state_flags;
struct smc_connection *conn = &smc->conn;
struct sock *sk = &smc->sk;
int old_state;
long timeout;
int rc = 0;
timeout = current->flags & PF_EXITING ?
0 : sock_flag(sk, SOCK_LINGER) ?
sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
old_state = sk->sk_state;
again:
switch (sk->sk_state) {
case SMC_INIT:
sk->sk_state = SMC_CLOSED;
break;
case SMC_LISTEN:
sk->sk_state = SMC_CLOSED;
sk->sk_state_change(sk); /* wake up accept */
if (smc->clcsock && smc->clcsock->sk) {
rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
/* wake up kernel_accept of smc_tcp_listen_worker */
smc->clcsock->sk->sk_data_ready(smc->clcsock->sk);
}
smc_close_cleanup_listen(sk);
release_sock(sk);
flush_work(&smc->tcp_listen_work);
lock_sock(sk);
break;
case SMC_ACTIVE:
smc_close_stream_wait(smc, timeout);
release_sock(sk);
cancel_delayed_work_sync(&conn->tx_work);
lock_sock(sk);
if (sk->sk_state == SMC_ACTIVE) {
/* send close request */
rc = smc_close_final(conn);
sk->sk_state = SMC_PEERCLOSEWAIT1;
} else {
/* peer event has changed the state */
goto again;
}
break;
case SMC_APPFINCLOSEWAIT:
/* socket already shutdown wr or both (active close) */
if (txflags->peer_done_writing &&
!smc_close_sent_any_close(conn)) {
/* just shutdown wr done, send close request */
rc = smc_close_final(conn);
}
sk->sk_state = SMC_CLOSED;
break;
case SMC_APPCLOSEWAIT1:
case SMC_APPCLOSEWAIT2:
if (!smc_cdc_rxed_any_close(conn))
smc_close_stream_wait(smc, timeout);
release_sock(sk);
cancel_delayed_work_sync(&conn->tx_work);
lock_sock(sk);
if (sk->sk_state != SMC_APPCLOSEWAIT1 &&
sk->sk_state != SMC_APPCLOSEWAIT2)
goto again;
/* confirm close from peer */
rc = smc_close_final(conn);
if (smc_cdc_rxed_any_close(conn)) {
/* peer has closed the socket already */
sk->sk_state = SMC_CLOSED;
sock_put(sk); /* postponed passive closing */
} else {
/* peer has just issued a shutdown write */
sk->sk_state = SMC_PEERFINCLOSEWAIT;
}
break;
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
if (txflags->peer_done_writing &&
!smc_close_sent_any_close(conn)) {
/* just shutdown wr done, send close request */
rc = smc_close_final(conn);
}
/* peer sending PeerConnectionClosed will cause transition */
break;
case SMC_PEERFINCLOSEWAIT:
/* peer sending PeerConnectionClosed will cause transition */
break;
case SMC_PROCESSABORT:
rc = smc_close_abort(conn);
sk->sk_state = SMC_CLOSED;
break;
case SMC_PEERABORTWAIT:
sk->sk_state = SMC_CLOSED;
break;
case SMC_CLOSED:
/* nothing to do, add tracing in future patch */
break;
}
if (old_state != sk->sk_state)
sk->sk_state_change(sk);
return rc;
}
static void smc_close_passive_abort_received(struct smc_sock *smc)
{
struct smc_cdc_conn_state_flags *txflags =
&smc->conn.local_tx_ctrl.conn_state_flags;
struct sock *sk = &smc->sk;
switch (sk->sk_state) {
case SMC_INIT:
case SMC_ACTIVE:
case SMC_APPCLOSEWAIT1:
sk->sk_state = SMC_PROCESSABORT;
sock_put(sk); /* passive closing */
break;
case SMC_APPFINCLOSEWAIT:
sk->sk_state = SMC_PROCESSABORT;
break;
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
if (txflags->peer_done_writing &&
!smc_close_sent_any_close(&smc->conn))
/* just shutdown, but not yet closed locally */
sk->sk_state = SMC_PROCESSABORT;
else
sk->sk_state = SMC_CLOSED;
sock_put(sk); /* passive closing */
break;
case SMC_APPCLOSEWAIT2:
case SMC_PEERFINCLOSEWAIT:
sk->sk_state = SMC_CLOSED;
sock_put(sk); /* passive closing */
break;
case SMC_PEERABORTWAIT:
sk->sk_state = SMC_CLOSED;
break;
case SMC_PROCESSABORT:
/* nothing to do, add tracing in future patch */
break;
}
}
/* Either some kind of closing has been received: peer_conn_closed,
* peer_conn_abort, or peer_done_writing
* or the link group of the connection terminates abnormally.
*/
static void smc_close_passive_work(struct work_struct *work)
{
struct smc_connection *conn = container_of(work,
struct smc_connection,
close_work);
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
struct smc_cdc_conn_state_flags *rxflags;
bool release_clcsock = false;
struct sock *sk = &smc->sk;
int old_state;
lock_sock(sk);
old_state = sk->sk_state;
rxflags = &conn->local_rx_ctrl.conn_state_flags;
if (rxflags->peer_conn_abort) {
/* peer has not received all data */
smc_close_passive_abort_received(smc);
release_sock(&smc->sk);
cancel_delayed_work_sync(&conn->tx_work);
lock_sock(&smc->sk);
goto wakeup;
}
switch (sk->sk_state) {
case SMC_INIT:
sk->sk_state = SMC_APPCLOSEWAIT1;
break;
case SMC_ACTIVE:
sk->sk_state = SMC_APPCLOSEWAIT1;
/* postpone sock_put() for passive closing to cover
* received SEND_SHUTDOWN as well
*/
break;
case SMC_PEERCLOSEWAIT1:
if (rxflags->peer_done_writing)
sk->sk_state = SMC_PEERCLOSEWAIT2;
/* fall through */
/* to check for closing */
case SMC_PEERCLOSEWAIT2:
if (!smc_cdc_rxed_any_close(conn))
break;
if (sock_flag(sk, SOCK_DEAD) &&
smc_close_sent_any_close(conn)) {
/* smc_release has already been called locally */
sk->sk_state = SMC_CLOSED;
} else {
/* just shutdown, but not yet closed locally */
sk->sk_state = SMC_APPFINCLOSEWAIT;
}
sock_put(sk); /* passive closing */
break;
case SMC_PEERFINCLOSEWAIT:
if (smc_cdc_rxed_any_close(conn)) {
sk->sk_state = SMC_CLOSED;
sock_put(sk); /* passive closing */
}
break;
case SMC_APPCLOSEWAIT1:
case SMC_APPCLOSEWAIT2:
/* postpone sock_put() for passive closing to cover
* received SEND_SHUTDOWN as well
*/
break;
case SMC_APPFINCLOSEWAIT:
case SMC_PEERABORTWAIT:
case SMC_PROCESSABORT:
case SMC_CLOSED:
/* nothing to do, add tracing in future patch */
break;
}
wakeup:
sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */
sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */
if (old_state != sk->sk_state) {
sk->sk_state_change(sk);
if ((sk->sk_state == SMC_CLOSED) &&
(sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) {
smc_conn_free(conn);
if (smc->clcsock)
release_clcsock = true;
}
}
release_sock(sk);
if (release_clcsock)
smc_clcsock_release(smc);
sock_put(sk); /* sock_hold done by schedulers of close_work */
}
int smc_close_shutdown_write(struct smc_sock *smc)
{
struct smc_connection *conn = &smc->conn;
struct sock *sk = &smc->sk;
int old_state;
long timeout;
int rc = 0;
timeout = current->flags & PF_EXITING ?
0 : sock_flag(sk, SOCK_LINGER) ?
sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
old_state = sk->sk_state;
again:
switch (sk->sk_state) {
case SMC_ACTIVE:
smc_close_stream_wait(smc, timeout);
release_sock(sk);
cancel_delayed_work_sync(&conn->tx_work);
lock_sock(sk);
if (sk->sk_state != SMC_ACTIVE)
goto again;
/* send close wr request */
rc = smc_close_wr(conn);
sk->sk_state = SMC_PEERCLOSEWAIT1;
break;
case SMC_APPCLOSEWAIT1:
/* passive close */
if (!smc_cdc_rxed_any_close(conn))
smc_close_stream_wait(smc, timeout);
release_sock(sk);
cancel_delayed_work_sync(&conn->tx_work);
lock_sock(sk);
if (sk->sk_state != SMC_APPCLOSEWAIT1)
goto again;
/* confirm close from peer */
rc = smc_close_wr(conn);
sk->sk_state = SMC_APPCLOSEWAIT2;
break;
case SMC_APPCLOSEWAIT2:
case SMC_PEERFINCLOSEWAIT:
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
case SMC_APPFINCLOSEWAIT:
case SMC_PROCESSABORT:
case SMC_PEERABORTWAIT:
/* nothing to do, add tracing in future patch */
break;
}
if (old_state != sk->sk_state)
sk->sk_state_change(sk);
return rc;
}
/* Initialize close properties on connection establishment. */
void smc_close_init(struct smc_sock *smc)
{
INIT_WORK(&smc->conn.close_work, smc_close_passive_work);
}