job: Fix nested aio_poll() hanging in job_txn_apply

All callers have acquired ctx already. Doing that again results in
aio_poll() hang. This fixes the problem that a BDRV_POLL_WHILE() in the
callback cannot make progress because ctx is recursively locked, for
example, when drive-backup finishes.

There are two callers of job_finalize():

    fam@lemon:~/work/qemu [master]$ git grep -w -A1 '^\s*job_finalize'
    blockdev.c:    job_finalize(&job->job, errp);
    blockdev.c-    aio_context_release(aio_context);
    --
    job-qmp.c:    job_finalize(job, errp);
    job-qmp.c-    aio_context_release(aio_context);
    --
    tests/test-blockjob.c:    job_finalize(&job->job, &error_abort);
    tests/test-blockjob.c-    assert(job->job.status == JOB_STATUS_CONCLUDED);

Ignoring the test, it's easy to see both callers to job_finalize (and
job_do_finalize) have acquired the context.

Cc: qemu-stable@nongnu.org
Reported-by: Gu Nini <ngu@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
This commit is contained in:
Fam Zheng 2018-08-24 10:43:42 +08:00 committed by Kevin Wolf
parent 6808ae0417
commit 49880165a4

18
job.c
View file

@ -136,21 +136,13 @@ static void job_txn_del_job(Job *job)
}
}
static int job_txn_apply(JobTxn *txn, int fn(Job *), bool lock)
static int job_txn_apply(JobTxn *txn, int fn(Job *))
{
AioContext *ctx;
Job *job, *next;
int rc = 0;
QLIST_FOREACH_SAFE(job, &txn->jobs, txn_list, next) {
if (lock) {
ctx = job->aio_context;
aio_context_acquire(ctx);
}
rc = fn(job);
if (lock) {
aio_context_release(ctx);
}
if (rc) {
break;
}
@ -780,11 +772,11 @@ static void job_do_finalize(Job *job)
assert(job && job->txn);
/* prepare the transaction to complete */
rc = job_txn_apply(job->txn, job_prepare, true);
rc = job_txn_apply(job->txn, job_prepare);
if (rc) {
job_completed_txn_abort(job);
} else {
job_txn_apply(job->txn, job_finalize_single, true);
job_txn_apply(job->txn, job_finalize_single);
}
}
@ -830,10 +822,10 @@ static void job_completed_txn_success(Job *job)
assert(other_job->ret == 0);
}
job_txn_apply(txn, job_transition_to_pending, false);
job_txn_apply(txn, job_transition_to_pending);
/* If no jobs need manual finalization, automatically do so */
if (job_txn_apply(txn, job_needs_finalize, false) == 0) {
if (job_txn_apply(txn, job_needs_finalize) == 0) {
job_do_finalize(job);
}
}