rbd fixes for a -rc1 regression and a subtle deadlock on lock_rwsem

(marked for stable).  Also included a rare WARN condition tweak.
 -----BEGIN PGP SIGNATURE-----
 
 iQFHBAABCAAxFiEEydHwtzie9C7TfviiSn/eOAIR84sFAmD67fETHGlkcnlvbW92
 QGdtYWlsLmNvbQAKCRBKf944AhHzi1UAB/43vuj0sLO2cAW7HkjvoSqQG6MHruUl
 XaeZCUxG6AdgvrpwFxfi7r2k8N4RegoYFKiqEXdnYl6BANEEcZR1KFB6Uy9vEOuo
 R1NdmBF7ZY2U1o22SpWFHbdoCOx7KEdsFHU5rTODw4dwAZuj3GtRyJ8uGPz7VatH
 0wTLPSIcphFkq5mcdA4hQSes3O4vKmDlVfBreUl+PQg/lxnBPsXx07gLIk3Q0gN1
 uKseGr0miSpDHIS1IjYBOMs8AM5VbJKuzcsy5iCE1z/9tI1J5fsPBrZCopCPjajt
 1yN8/r7F7Ih9HaZoEU4NXLbEbLe4eX9XEWGOmiZjgry66zxwOCr3rJGa
 =Mqd9
 -----END PGP SIGNATURE-----

Merge tag 'ceph-for-5.14-rc3' of git://github.com/ceph/ceph-client

Pull ceph fixes from Ilya Dryomov:
 "A subtle deadlock on lock_rwsem (marked for stable) and rbd fixes for
  a -rc1 regression.

  Also included a rare WARN condition tweak"

* tag 'ceph-for-5.14-rc3' of git://github.com/ceph/ceph-client:
  rbd: resurrect setting of disk->private_data in rbd_init_disk()
  ceph: don't WARN if we're still opening a session to an MDS
  rbd: don't hold lock_rwsem while running_list is being drained
  rbd: always kick acquire on "acquired" and "released" notifications
This commit is contained in:
Linus Torvalds 2021-07-23 11:30:12 -07:00
commit 704f4cba43
2 changed files with 14 additions and 21 deletions

View file

@ -4100,8 +4100,6 @@ static void rbd_acquire_lock(struct work_struct *work)
static bool rbd_quiesce_lock(struct rbd_device *rbd_dev)
{
bool need_wait;
dout("%s rbd_dev %p\n", __func__, rbd_dev);
lockdep_assert_held_write(&rbd_dev->lock_rwsem);
@ -4113,11 +4111,11 @@ static bool rbd_quiesce_lock(struct rbd_device *rbd_dev)
*/
rbd_dev->lock_state = RBD_LOCK_STATE_RELEASING;
rbd_assert(!completion_done(&rbd_dev->releasing_wait));
need_wait = !list_empty(&rbd_dev->running_list);
downgrade_write(&rbd_dev->lock_rwsem);
if (need_wait)
wait_for_completion(&rbd_dev->releasing_wait);
up_read(&rbd_dev->lock_rwsem);
if (list_empty(&rbd_dev->running_list))
return true;
up_write(&rbd_dev->lock_rwsem);
wait_for_completion(&rbd_dev->releasing_wait);
down_write(&rbd_dev->lock_rwsem);
if (rbd_dev->lock_state != RBD_LOCK_STATE_RELEASING)
@ -4203,15 +4201,11 @@ static void rbd_handle_acquired_lock(struct rbd_device *rbd_dev, u8 struct_v,
if (!rbd_cid_equal(&cid, &rbd_empty_cid)) {
down_write(&rbd_dev->lock_rwsem);
if (rbd_cid_equal(&cid, &rbd_dev->owner_cid)) {
/*
* we already know that the remote client is
* the owner
*/
up_write(&rbd_dev->lock_rwsem);
return;
dout("%s rbd_dev %p cid %llu-%llu == owner_cid\n",
__func__, rbd_dev, cid.gid, cid.handle);
} else {
rbd_set_owner_cid(rbd_dev, &cid);
}
rbd_set_owner_cid(rbd_dev, &cid);
downgrade_write(&rbd_dev->lock_rwsem);
} else {
down_read(&rbd_dev->lock_rwsem);
@ -4236,14 +4230,12 @@ static void rbd_handle_released_lock(struct rbd_device *rbd_dev, u8 struct_v,
if (!rbd_cid_equal(&cid, &rbd_empty_cid)) {
down_write(&rbd_dev->lock_rwsem);
if (!rbd_cid_equal(&cid, &rbd_dev->owner_cid)) {
dout("%s rbd_dev %p unexpected owner, cid %llu-%llu != owner_cid %llu-%llu\n",
dout("%s rbd_dev %p cid %llu-%llu != owner_cid %llu-%llu\n",
__func__, rbd_dev, cid.gid, cid.handle,
rbd_dev->owner_cid.gid, rbd_dev->owner_cid.handle);
up_write(&rbd_dev->lock_rwsem);
return;
} else {
rbd_set_owner_cid(rbd_dev, &rbd_empty_cid);
}
rbd_set_owner_cid(rbd_dev, &rbd_empty_cid);
downgrade_write(&rbd_dev->lock_rwsem);
} else {
down_read(&rbd_dev->lock_rwsem);
@ -4951,6 +4943,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
disk->minors = RBD_MINORS_PER_MAJOR;
}
disk->fops = &rbd_bd_ops;
disk->private_data = rbd_dev;
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
/* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */

View file

@ -4456,7 +4456,7 @@ bool check_session_state(struct ceph_mds_session *s)
break;
case CEPH_MDS_SESSION_CLOSING:
/* Should never reach this when we're unmounting */
WARN_ON_ONCE(true);
WARN_ON_ONCE(s->s_ttl);
fallthrough;
case CEPH_MDS_SESSION_NEW:
case CEPH_MDS_SESSION_RESTARTING: