From d95431624f934fe4740211738fc787808005b14e Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Fri, 15 Sep 2023 10:02:32 -0600 Subject: [PATCH] nvme: Give up when we've failed Normally, we poll the device every so often to see if commands have timed out. However, we'll go into the recovery state as part of failing the drive. To account for all possibilties, if we're failed when we get into the polling function, just stop polling: Party is over. Sponsored by: Netflix --- sys/dev/nvme/nvme_qpair.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c index b256c4713c8d..4e37aa0e1020 100644 --- a/sys/dev/nvme/nvme_qpair.c +++ b/sys/dev/nvme/nvme_qpair.c @@ -1011,6 +1011,17 @@ nvme_qpair_timeout(void *arg) mtx_assert(&qpair->recovery, MA_OWNED); + /* + * If the controller has failed, give up. We're never going to change + * state from a failed controller: no further transactions are possible. + * We go ahead and let the timeout expire in many cases for simplicity. + */ + if (qpair->ctrlr->is_failed) { + nvme_printf(ctrlr, "Controller failed, giving up\n"); + qpair->timer_armed = false; + return; + } + switch (qpair->recovery_state) { case RECOVERY_NONE: /* @@ -1094,8 +1105,8 @@ nvme_qpair_timeout(void *arg) idle = false; /* We want to keep polling */ break; case RECOVERY_WAITING: - nvme_printf(ctrlr, "waiting for reset to complete\n"); - idle = false; /* We want to keep polling */ + nvme_printf(ctrlr, "Waiting for reset to complete\n"); + idle = false; /* We want to keep polling */ break; }