mirror of
https://github.com/freebsd/freebsd-src
synced 2024-09-30 13:44:55 +00:00
nvme: Separate total failures from I/O failures
When it's a I/O failure, we can still send admin commands. Separate out
the admin failures and flag them as such so that we can still send admin
commands on half-failed drives.
Fixes: 9229b3105d
(nvme: Fail passthrough commands right away in failed state)
Sponsored by: Netflix
This commit is contained in:
parent
ce7fac64ba
commit
3d89acf590
|
@ -232,7 +232,7 @@ nvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr)
|
|||
}
|
||||
|
||||
static void
|
||||
nvme_ctrlr_fail(struct nvme_controller *ctrlr)
|
||||
nvme_ctrlr_fail(struct nvme_controller *ctrlr, bool admin_also)
|
||||
{
|
||||
int i;
|
||||
|
||||
|
@ -242,7 +242,10 @@ nvme_ctrlr_fail(struct nvme_controller *ctrlr)
|
|||
* a different error, though when we fail, that hardly matters).
|
||||
*/
|
||||
ctrlr->is_failed = true;
|
||||
nvme_qpair_fail(&ctrlr->adminq);
|
||||
if (admin_also) {
|
||||
ctrlr->is_failed_admin = true;
|
||||
nvme_qpair_fail(&ctrlr->adminq);
|
||||
}
|
||||
if (ctrlr->ioq != NULL) {
|
||||
for (i = 0; i < ctrlr->num_io_queues; i++) {
|
||||
nvme_qpair_fail(&ctrlr->ioq[i]);
|
||||
|
@ -415,6 +418,7 @@ nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr)
|
|||
|
||||
TSENTER();
|
||||
|
||||
ctrlr->is_failed_admin = true;
|
||||
nvme_ctrlr_disable_qpairs(ctrlr);
|
||||
|
||||
err = nvme_ctrlr_disable(ctrlr);
|
||||
|
@ -423,6 +427,8 @@ nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr)
|
|||
|
||||
err = nvme_ctrlr_enable(ctrlr);
|
||||
out:
|
||||
if (err == 0)
|
||||
ctrlr->is_failed_admin = false;
|
||||
|
||||
TSEXIT();
|
||||
return (err);
|
||||
|
@ -435,11 +441,10 @@ nvme_ctrlr_reset(struct nvme_controller *ctrlr)
|
|||
|
||||
cmpset = atomic_cmpset_32(&ctrlr->is_resetting, 0, 1);
|
||||
|
||||
if (cmpset == 0 || ctrlr->is_failed)
|
||||
if (cmpset == 0)
|
||||
/*
|
||||
* Controller is already resetting or has failed. Return
|
||||
* immediately since there is no need to kick off another
|
||||
* reset in these cases.
|
||||
* Controller is already resetting. Return immediately since
|
||||
* there is no need to kick off another reset.
|
||||
*/
|
||||
return;
|
||||
|
||||
|
@ -1090,7 +1095,7 @@ nvme_ctrlr_start(void *ctrlr_arg, bool resetting)
|
|||
return;
|
||||
|
||||
if (resetting && nvme_ctrlr_identify(ctrlr) != 0) {
|
||||
nvme_ctrlr_fail(ctrlr);
|
||||
nvme_ctrlr_fail(ctrlr, false);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1105,7 +1110,7 @@ nvme_ctrlr_start(void *ctrlr_arg, bool resetting)
|
|||
if (resetting) {
|
||||
old_num_io_queues = ctrlr->num_io_queues;
|
||||
if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0) {
|
||||
nvme_ctrlr_fail(ctrlr);
|
||||
nvme_ctrlr_fail(ctrlr, false);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1123,12 +1128,12 @@ nvme_ctrlr_start(void *ctrlr_arg, bool resetting)
|
|||
nvme_ctrlr_hmb_enable(ctrlr, true, true);
|
||||
|
||||
if (nvme_ctrlr_create_qpairs(ctrlr) != 0) {
|
||||
nvme_ctrlr_fail(ctrlr);
|
||||
nvme_ctrlr_fail(ctrlr, false);
|
||||
return;
|
||||
}
|
||||
|
||||
if (nvme_ctrlr_construct_namespaces(ctrlr) != 0) {
|
||||
nvme_ctrlr_fail(ctrlr);
|
||||
nvme_ctrlr_fail(ctrlr, false);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1148,8 +1153,7 @@ nvme_ctrlr_start_config_hook(void *arg)
|
|||
TSENTER();
|
||||
|
||||
if (nvme_ctrlr_hw_reset(ctrlr) != 0) {
|
||||
fail:
|
||||
nvme_ctrlr_fail(ctrlr);
|
||||
nvme_ctrlr_fail(ctrlr, true);
|
||||
config_intrhook_disestablish(&ctrlr->config_hook);
|
||||
return;
|
||||
}
|
||||
|
@ -1162,13 +1166,15 @@ nvme_ctrlr_start_config_hook(void *arg)
|
|||
nvme_ctrlr_construct_io_qpairs(ctrlr) == 0)
|
||||
nvme_ctrlr_start(ctrlr, false);
|
||||
else
|
||||
goto fail;
|
||||
nvme_ctrlr_fail(ctrlr, false);
|
||||
|
||||
nvme_sysctl_initialize_ctrlr(ctrlr);
|
||||
config_intrhook_disestablish(&ctrlr->config_hook);
|
||||
|
||||
ctrlr->is_initialized = true;
|
||||
nvme_notify_new_controller(ctrlr);
|
||||
if (!ctrlr->is_failed) {
|
||||
ctrlr->is_initialized = true;
|
||||
nvme_notify_new_controller(ctrlr);
|
||||
}
|
||||
TSEXIT();
|
||||
}
|
||||
|
||||
|
@ -1185,7 +1191,7 @@ nvme_ctrlr_reset_task(void *arg, int pending)
|
|||
nvme_ctrlr_start(ctrlr, true);
|
||||
} else {
|
||||
nvme_ctrlr_devctl_log(ctrlr, "RESET", "event=\"timed_out\"");
|
||||
nvme_ctrlr_fail(ctrlr);
|
||||
nvme_ctrlr_fail(ctrlr, true);
|
||||
}
|
||||
|
||||
atomic_cmpset_32(&ctrlr->is_resetting, 1, 0);
|
||||
|
@ -1612,7 +1618,7 @@ nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev)
|
|||
*/
|
||||
gone = (nvme_mmio_read_4(ctrlr, csts) == NVME_GONE);
|
||||
if (gone)
|
||||
nvme_ctrlr_fail(ctrlr);
|
||||
nvme_ctrlr_fail(ctrlr, true);
|
||||
else
|
||||
nvme_notify_fail_consumers(ctrlr);
|
||||
|
||||
|
@ -1742,7 +1748,9 @@ nvme_ctrlr_suspend(struct nvme_controller *ctrlr)
|
|||
int to = hz;
|
||||
|
||||
/*
|
||||
* Can't touch failed controllers, so it's already suspended.
|
||||
* Can't touch failed controllers, so it's already suspended. User will
|
||||
* need to do an explicit reset to bring it back, if that's even
|
||||
* possible.
|
||||
*/
|
||||
if (ctrlr->is_failed)
|
||||
return (0);
|
||||
|
@ -1809,7 +1817,7 @@ nvme_ctrlr_resume(struct nvme_controller *ctrlr)
|
|||
* itself, due to questionable APIs.
|
||||
*/
|
||||
nvme_printf(ctrlr, "Failed to reset on resume, failing.\n");
|
||||
nvme_ctrlr_fail(ctrlr);
|
||||
nvme_ctrlr_fail(ctrlr, true);
|
||||
(void)atomic_cmpset_32(&ctrlr->is_resetting, 1, 0);
|
||||
return (0);
|
||||
}
|
||||
|
|
|
@ -301,6 +301,7 @@ struct nvme_controller {
|
|||
uint32_t notification_sent;
|
||||
|
||||
bool is_failed;
|
||||
bool is_failed_admin;
|
||||
bool is_dying;
|
||||
bool isr_warned;
|
||||
bool is_initialized;
|
||||
|
|
|
@ -1046,6 +1046,7 @@ nvme_qpair_timeout(void *arg)
|
|||
struct nvme_tracker *tr;
|
||||
sbintime_t now;
|
||||
bool idle = true;
|
||||
bool is_admin = qpair == &ctrlr->adminq;
|
||||
bool fast;
|
||||
uint32_t csts;
|
||||
uint8_t cfs;
|
||||
|
@ -1057,9 +1058,10 @@ nvme_qpair_timeout(void *arg)
|
|||
* failure processing that races with the qpair timeout will fail
|
||||
* safely.
|
||||
*/
|
||||
if (qpair->ctrlr->is_failed) {
|
||||
if (is_admin ? qpair->ctrlr->is_failed_admin : qpair->ctrlr->is_failed) {
|
||||
nvme_printf(qpair->ctrlr,
|
||||
"Failed controller, stopping watchdog timeout.\n");
|
||||
"%sFailed controller, stopping watchdog timeout.\n",
|
||||
is_admin ? "Complete " : "");
|
||||
qpair->timer_armed = false;
|
||||
return;
|
||||
}
|
||||
|
@ -1329,6 +1331,7 @@ _nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req)
|
|||
{
|
||||
struct nvme_tracker *tr;
|
||||
int err = 0;
|
||||
bool is_admin = qpair == &qpair->ctrlr->adminq;
|
||||
|
||||
mtx_assert(&qpair->lock, MA_OWNED);
|
||||
|
||||
|
@ -1338,12 +1341,14 @@ _nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req)
|
|||
/*
|
||||
* The controller has failed, so fail the request. Note, that this races
|
||||
* the recovery / timeout code. Since we hold the qpair lock, we know
|
||||
* it's safe to fail directly. is_failed is set when we fail the controller.
|
||||
* It is only ever reset in the ioctl reset controller path, which is safe
|
||||
* to race (for failed controllers, we make no guarantees about bringing
|
||||
* it out of failed state relative to other commands).
|
||||
* it's safe to fail directly. is_failed is set when we fail the
|
||||
* controller. It is only ever reset in the ioctl reset controller
|
||||
* path, which is safe to race (for failed controllers, we make no
|
||||
* guarantees about bringing it out of failed state relative to other
|
||||
* commands). We try hard to allow admin commands when the entire
|
||||
* controller hasn't failed, only something related to I/O queues.
|
||||
*/
|
||||
if (qpair->ctrlr->is_failed) {
|
||||
if (is_admin ? qpair->ctrlr->is_failed_admin : qpair->ctrlr->is_failed) {
|
||||
nvme_qpair_manual_complete_request(qpair, req,
|
||||
NVME_SCT_GENERIC, NVME_SC_ABORTED_BY_REQUEST, 1,
|
||||
ERROR_PRINT_NONE);
|
||||
|
@ -1410,11 +1415,13 @@ nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req)
|
|||
static void
|
||||
nvme_qpair_enable(struct nvme_qpair *qpair)
|
||||
{
|
||||
bool is_admin __unused = qpair == &qpair->ctrlr->adminq;
|
||||
|
||||
if (mtx_initialized(&qpair->recovery))
|
||||
mtx_assert(&qpair->recovery, MA_OWNED);
|
||||
if (mtx_initialized(&qpair->lock))
|
||||
mtx_assert(&qpair->lock, MA_OWNED);
|
||||
KASSERT(!qpair->ctrlr->is_failed,
|
||||
KASSERT(!(is_admin ? qpair->ctrlr->is_failed_admin : qpair->ctrlr->is_failed),
|
||||
("Enabling a failed qpair\n"));
|
||||
|
||||
qpair->recovery_state = RECOVERY_NONE;
|
||||
|
|
|
@ -268,7 +268,6 @@ nvme_sim_action(struct cam_sim *sim, union ccb *ccb)
|
|||
ccb->ccb_h.status = CAM_REQ_CMP;
|
||||
break;
|
||||
case XPT_NVME_IO: /* Execute the requested I/O operation */
|
||||
case XPT_NVME_ADMIN: /* or Admin operation */
|
||||
if (ctrlr->is_failed) {
|
||||
/*
|
||||
* I/O came in while we were failing the drive, so drop
|
||||
|
@ -279,6 +278,18 @@ nvme_sim_action(struct cam_sim *sim, union ccb *ccb)
|
|||
}
|
||||
nvme_sim_nvmeio(sim, ccb);
|
||||
return; /* no done */
|
||||
case XPT_NVME_ADMIN: /* or Admin operation */
|
||||
if (ctrlr->is_failed_admin) {
|
||||
/*
|
||||
* Admin request came in when we can't send admin
|
||||
* commands, so drop it. Once falure is complete, we'll
|
||||
* be destroyed.
|
||||
*/
|
||||
ccb->ccb_h.status = CAM_DEV_NOT_THERE;
|
||||
break;
|
||||
}
|
||||
nvme_sim_nvmeio(sim, ccb);
|
||||
return; /* no done */
|
||||
default:
|
||||
ccb->ccb_h.status = CAM_REQ_INVALID;
|
||||
break;
|
||||
|
|
Loading…
Reference in a new issue