migration/colo.c: Use event instead of semaphore

If multiple packets miscompare in a short timeframe, the semaphore
value will be increased multiple times. This causes multiple
checkpoints even if one would be sufficient.

Fix this by using a event instead of a semaphore for triggering
checkpoints. Now, checkpoint requests will be ignored until the
checkpoint event is sent to colo-compare (which releases the
miscompared packets).

Benchmark results (iperf3):
Client-to-server tcp:
without patch: ~66 Mbit/s
with patch: ~61 Mbit/s
Server-to-client tcp:
without patch: ~702 Kbit/s
with patch: ~16 Mbit/s

Signed-off-by: Lukas Straub <lukasstraub2@web.de>
Message-Id: <fd601ba1beb524aada54ba66e87ebfc12cf4574b.1589193382.git.lukasstraub2@web.de>
Reviewed-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
This commit is contained in:
Lukas Straub 2020-05-11 13:10:44 +02:00 committed by Dr. David Alan Gilbert
parent e0d138aa9b
commit bb70b66ed7
2 changed files with 7 additions and 6 deletions

View file

@ -436,6 +436,7 @@ static int colo_do_checkpoint_transaction(MigrationState *s,
goto out;
}
qemu_event_reset(&s->colo_checkpoint_event);
colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err);
if (local_err) {
goto out;
@ -589,7 +590,7 @@ static void colo_process_checkpoint(MigrationState *s)
goto out;
}
qemu_sem_wait(&s->colo_checkpoint_sem);
qemu_event_wait(&s->colo_checkpoint_event);
if (s->state != MIGRATION_STATUS_COLO) {
goto out;
@ -637,7 +638,7 @@ out:
colo_compare_unregister_notifier(&packets_compare_notifier);
timer_del(s->colo_delay_timer);
timer_free(s->colo_delay_timer);
qemu_sem_destroy(&s->colo_checkpoint_sem);
qemu_event_destroy(&s->colo_checkpoint_event);
/*
* Must be called after failover BH is completed,
@ -654,7 +655,7 @@ void colo_checkpoint_notify(void *opaque)
MigrationState *s = opaque;
int64_t next_notify_time;
qemu_sem_post(&s->colo_checkpoint_sem);
qemu_event_set(&s->colo_checkpoint_event);
s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
next_notify_time = s->colo_checkpoint_time +
s->parameters.x_checkpoint_delay;
@ -664,7 +665,7 @@ void colo_checkpoint_notify(void *opaque)
void migrate_start_colo_process(MigrationState *s)
{
qemu_mutex_unlock_iothread();
qemu_sem_init(&s->colo_checkpoint_sem, 0);
qemu_event_init(&s->colo_checkpoint_event, false);
s->colo_delay_timer = timer_new_ms(QEMU_CLOCK_HOST,
colo_checkpoint_notify, s);

View file

@ -215,8 +215,8 @@ struct MigrationState
/* The semaphore is used to notify COLO thread that failover is finished */
QemuSemaphore colo_exit_sem;
/* The semaphore is used to notify COLO thread to do checkpoint */
QemuSemaphore colo_checkpoint_sem;
/* The event is used to notify COLO thread to do checkpoint */
QemuEvent colo_checkpoint_event;
int64_t colo_checkpoint_time;
QEMUTimer *colo_delay_timer;