nvmf: Handle shutdowns more gracefully

If an association is disconnected during a clean shutdown, abort all
pending and future I/O requests with an error to avoid hangs either due
to filesystem unmounts or a stuck GEOM event.

If an association is connected during a clean shutdown, gracefully
disconnect from the remote controller and close the open queues.

Reviewed by:	imp
Sponsored by:	Chelsio Communications
Differential Revision:	https://reviews.freebsd.org/D45462
This commit is contained in:
John Baldwin 2024-06-05 12:59:28 -07:00
parent aacaeeee8e
commit f46d4971b5
4 changed files with 114 additions and 7 deletions

View file

@ -8,12 +8,14 @@
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/conf.h>
#include <sys/eventhandler.h>
#include <sys/lock.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/memdesc.h>
#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/reboot.h>
#include <sys/sx.h>
#include <sys/sysctl.h>
#include <sys/taskqueue.h>
@ -31,6 +33,8 @@ SYSCTL_BOOL(_kern_nvmf, OID_AUTO, fail_on_disconnection, CTLFLAG_RWTUN,
MALLOC_DEFINE(M_NVMF, "nvmf", "NVMe over Fabrics host");
static void nvmf_disconnect_task(void *arg, int pending);
static void nvmf_shutdown_pre_sync(void *arg, int howto);
static void nvmf_shutdown_post_sync(void *arg, int howto);
void
nvmf_complete(void *arg, const struct nvme_completion *cqe)
@ -528,6 +532,11 @@ nvmf_attach(device_t dev)
goto out;
}
sc->shutdown_pre_sync_eh = EVENTHANDLER_REGISTER(shutdown_pre_sync,
nvmf_shutdown_pre_sync, sc, SHUTDOWN_PRI_FIRST);
sc->shutdown_post_sync_eh = EVENTHANDLER_REGISTER(shutdown_post_sync,
nvmf_shutdown_post_sync, sc, SHUTDOWN_PRI_FIRST);
return (0);
out:
if (sc->ns != NULL) {
@ -698,6 +707,62 @@ nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_handoff_host *hh)
return (error);
}
static void
nvmf_shutdown_pre_sync(void *arg, int howto)
{
struct nvmf_softc *sc = arg;
if ((howto & RB_NOSYNC) != 0 || SCHEDULER_STOPPED())
return;
/*
* If this association is disconnected, abort any pending
* requests with an error to permit filesystems to unmount
* without hanging.
*/
sx_xlock(&sc->connection_lock);
if (sc->admin != NULL || sc->detaching) {
sx_xunlock(&sc->connection_lock);
return;
}
for (u_int i = 0; i < sc->cdata->nn; i++) {
if (sc->ns[i] != NULL)
nvmf_shutdown_ns(sc->ns[i]);
}
nvmf_shutdown_sim(sc);
sx_xunlock(&sc->connection_lock);
}
static void
nvmf_shutdown_post_sync(void *arg, int howto)
{
struct nvmf_softc *sc = arg;
if ((howto & RB_NOSYNC) != 0 || SCHEDULER_STOPPED())
return;
/*
* If this association is connected, disconnect gracefully.
*/
sx_xlock(&sc->connection_lock);
if (sc->admin == NULL || sc->detaching) {
sx_xunlock(&sc->connection_lock);
return;
}
callout_drain(&sc->ka_tx_timer);
callout_drain(&sc->ka_rx_timer);
nvmf_shutdown_controller(sc);
for (u_int i = 0; i < sc->num_io_queues; i++) {
nvmf_destroy_qp(sc->io[i]);
}
nvmf_destroy_qp(sc->admin);
sc->admin = NULL;
sx_xunlock(&sc->connection_lock);
}
static int
nvmf_detach(device_t dev)
{
@ -710,6 +775,9 @@ nvmf_detach(device_t dev)
sc->detaching = true;
sx_xunlock(&sc->connection_lock);
EVENTHANDLER_DEREGISTER(shutdown_pre_sync, sc->shutdown_pre_sync_eh);
EVENTHANDLER_DEREGISTER(shutdown_pre_sync, sc->shutdown_post_sync_eh);
nvmf_destroy_sim(sc);
for (i = 0; i < sc->cdata->nn; i++) {
if (sc->ns[i] != NULL)
@ -1006,9 +1074,6 @@ static device_method_t nvmf_methods[] = {
DEVMETHOD(device_probe, nvmf_probe),
DEVMETHOD(device_attach, nvmf_attach),
DEVMETHOD(device_detach, nvmf_detach),
#if 0
DEVMETHOD(device_shutdown, nvmf_shutdown),
#endif
DEVMETHOD_END
};

View file

@ -29,6 +29,7 @@ struct nvmf_namespace {
u_int flags;
uint32_t lba_size;
bool disconnected;
bool shutdown;
TAILQ_HEAD(, bio) pending_bios;
struct mtx lock;
@ -89,7 +90,7 @@ nvmf_ns_biodone(struct bio *bio)
bio->bio_driver2 = 0;
mtx_lock(&ns->lock);
if (ns->disconnected) {
if (nvmf_fail_disconnect) {
if (nvmf_fail_disconnect || ns->shutdown) {
mtx_unlock(&ns->lock);
bio->bio_error = ECONNABORTED;
bio->bio_flags |= BIO_ERROR;
@ -211,7 +212,7 @@ nvmf_ns_submit_bio(struct nvmf_namespace *ns, struct bio *bio)
mtx_lock(&ns->lock);
if (ns->disconnected) {
if (nvmf_fail_disconnect) {
if (nvmf_fail_disconnect || ns->shutdown) {
error = ECONNABORTED;
} else {
TAILQ_INSERT_TAIL(&ns->pending_bios, bio, bio_queue);
@ -429,6 +430,28 @@ nvmf_reconnect_ns(struct nvmf_namespace *ns)
}
}
void
nvmf_shutdown_ns(struct nvmf_namespace *ns)
{
TAILQ_HEAD(, bio) bios;
struct bio *bio;
mtx_lock(&ns->lock);
ns->shutdown = true;
TAILQ_INIT(&bios);
TAILQ_CONCAT(&bios, &ns->pending_bios, bio_queue);
mtx_unlock(&ns->lock);
while (!TAILQ_EMPTY(&bios)) {
bio = TAILQ_FIRST(&bios);
TAILQ_REMOVE(&bios, bio, bio_queue);
bio->bio_error = ECONNABORTED;
bio->bio_flags |= BIO_ERROR;
bio->bio_resid = bio->bio_bcount;
biodone(bio);
}
}
void
nvmf_destroy_ns(struct nvmf_namespace *ns)
{

View file

@ -40,7 +40,10 @@ nvmf_ccb_done(union ccb *ccb)
return;
if (nvmf_cqe_aborted(&ccb->nvmeio.cpl)) {
if (nvmf_fail_disconnect)
struct cam_sim *sim = xpt_path_sim(ccb->ccb_h.path);
struct nvmf_softc *sc = cam_sim_softc(sim);
if (nvmf_fail_disconnect || sc->sim_shutdown)
ccb->ccb_h.status = CAM_DEV_NOT_THERE;
else
ccb->ccb_h.status = CAM_REQUEUE_REQ;
@ -109,7 +112,7 @@ nvmf_sim_io(struct nvmf_softc *sc, union ccb *ccb)
mtx_lock(&sc->sim_mtx);
if (sc->sim_disconnected) {
mtx_unlock(&sc->sim_mtx);
if (nvmf_fail_disconnect)
if (nvmf_fail_disconnect || sc->sim_shutdown)
nvmeio->ccb_h.status = CAM_DEV_NOT_THERE;
else
nvmeio->ccb_h.status = CAM_REQUEUE_REQ;
@ -325,6 +328,15 @@ nvmf_reconnect_sim(struct nvmf_softc *sc)
xpt_release_simq(sc->sim, 1);
}
void
nvmf_shutdown_sim(struct nvmf_softc *sc)
{
mtx_lock(&sc->sim_mtx);
sc->sim_shutdown = true;
mtx_unlock(&sc->sim_mtx);
xpt_release_simq(sc->sim, 1);
}
void
nvmf_destroy_sim(struct nvmf_softc *sc)
{

View file

@ -9,6 +9,7 @@
#define __NVMF_VAR_H__
#include <sys/_callout.h>
#include <sys/_eventhandler.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
#include <sys/_sx.h>
@ -42,6 +43,7 @@ struct nvmf_softc {
struct cam_path *path;
struct mtx sim_mtx;
bool sim_disconnected;
bool sim_shutdown;
struct nvmf_namespace **ns;
@ -82,6 +84,9 @@ struct nvmf_softc {
u_int num_aer;
struct nvmf_aer *aer;
eventhandler_tag shutdown_pre_sync_eh;
eventhandler_tag shutdown_post_sync_eh;
};
struct nvmf_request {
@ -187,6 +192,7 @@ struct nvmf_namespace *nvmf_init_ns(struct nvmf_softc *sc, uint32_t id,
const struct nvme_namespace_data *data);
void nvmf_disconnect_ns(struct nvmf_namespace *ns);
void nvmf_reconnect_ns(struct nvmf_namespace *ns);
void nvmf_shutdown_ns(struct nvmf_namespace *ns);
void nvmf_destroy_ns(struct nvmf_namespace *ns);
bool nvmf_update_ns(struct nvmf_namespace *ns,
const struct nvme_namespace_data *data);
@ -206,6 +212,7 @@ void nvmf_free_request(struct nvmf_request *req);
int nvmf_init_sim(struct nvmf_softc *sc);
void nvmf_disconnect_sim(struct nvmf_softc *sc);
void nvmf_reconnect_sim(struct nvmf_softc *sc);
void nvmf_shutdown_sim(struct nvmf_softc *sc);
void nvmf_destroy_sim(struct nvmf_softc *sc);
void nvmf_sim_rescan_ns(struct nvmf_softc *sc, uint32_t id);