NFSv4: Fix a NFSv4 state manager deadlock

Fix a deadlock whereby the NFSv4 state manager can get stuck in the
delegation return code, waiting for a layout return to complete in
another thread. If the server reboots before that other thread
completes, then we need to be able to start a second state
manager thread in order to perform recovery.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
This commit is contained in:
Trond Myklebust 2018-11-19 20:11:45 -05:00
parent 9ff01193a2
commit aeabb3c961
2 changed files with 13 additions and 5 deletions

View file

@ -41,6 +41,8 @@ enum nfs4_client_state {
NFS4CLNT_MOVED,
NFS4CLNT_LEASE_MOVED,
NFS4CLNT_DELEGATION_EXPIRED,
NFS4CLNT_RUN_MANAGER,
NFS4CLNT_DELEGRETURN_RUNNING,
};
#define NFS4_RENEW_TIMEOUT 0x01

View file

@ -1210,6 +1210,7 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
struct task_struct *task;
char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1];
set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
return;
__module_get(THIS_MODULE);
@ -2503,6 +2504,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
/* Ensure exclusive access to NFSv4 state */
do {
clear_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) {
section = "purge state";
status = nfs4_purge_lease(clp);
@ -2593,14 +2595,18 @@ static void nfs4_state_manager(struct nfs_client *clp)
}
nfs4_end_drain_session(clp);
if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) {
nfs_client_return_marked_delegations(clp);
continue;
nfs4_clear_state_manager_bit(clp);
if (!test_and_set_bit(NFS4CLNT_DELEGRETURN_RUNNING, &clp->cl_state)) {
if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) {
nfs_client_return_marked_delegations(clp);
set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
}
clear_bit(NFS4CLNT_DELEGRETURN_RUNNING, &clp->cl_state);
}
nfs4_clear_state_manager_bit(clp);
/* Did we race with an attempt to give us more work? */
if (clp->cl_state == 0)
if (!test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state))
return;
if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
return;