diff --git a/migration/migration.c b/migration/migration.c index d61e572742..3ee1e6b0d6 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -153,6 +153,7 @@ void migration_object_init(void) qemu_sem_init(¤t_incoming->postcopy_qemufile_dst_done, 0); qemu_mutex_init(¤t_incoming->page_request_mutex); + qemu_cond_init(¤t_incoming->page_request_cond); current_incoming->page_requested = g_tree_new(page_request_addr_cmp); migration_object_check(current_migration, &error_fatal); @@ -367,7 +368,7 @@ int migrate_send_rp_req_pages(MigrationIncomingState *mis, * things like g_tree_lookup() will return TRUE (1) when found. */ g_tree_insert(mis->page_requested, aligned, (gpointer)1); - mis->page_requested_count++; + qatomic_inc(&mis->page_requested_count); trace_postcopy_page_req_add(aligned, mis->page_requested_count); } } diff --git a/migration/migration.h b/migration/migration.h index c390500604..cdaa10d515 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -196,7 +196,10 @@ struct MigrationIncomingState { /* A tree of pages that we requested to the source VM */ GTree *page_requested; - /* For debugging purpose only, but would be nice to keep */ + /* + * For postcopy only, count the number of requested page faults that + * still haven't been resolved. + */ int page_requested_count; /* * The mutex helps to maintain the requested pages that we sent to the @@ -210,6 +213,14 @@ struct MigrationIncomingState { * contains valid information. */ QemuMutex page_request_mutex; + /* + * If postcopy preempt is enabled, there is a chance that the main + * thread finished loading its data before the preempt channel has + * finished loading the urgent pages. If that happens, the two threads + * will use this condvar to synchronize, so the main thread will always + * wait until all pages received. + */ + QemuCond page_request_cond; /* * Number of devices that have yet to approve switchover. When this reaches diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 29aea9456d..5408e028c6 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -599,6 +599,30 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) if (mis->preempt_thread_status == PREEMPT_THREAD_CREATED) { /* Notify the fast load thread to quit */ mis->preempt_thread_status = PREEMPT_THREAD_QUIT; + /* + * Update preempt_thread_status before reading count. Note: mutex + * lock only provide ACQUIRE semantic, and it doesn't stops this + * write to be reordered after reading the count. + */ + smp_mb(); + /* + * It's possible that the preempt thread is still handling the last + * pages to arrive which were requested by guest page faults. + * Making sure nothing is left behind by waiting on the condvar if + * that unlikely case happened. + */ + WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) { + if (qatomic_read(&mis->page_requested_count)) { + /* + * It is guaranteed to receive a signal later, because the + * count>0 now, so it's destined to be decreased to zero + * very soon by the preempt thread. + */ + qemu_cond_wait(&mis->page_request_cond, + &mis->page_request_mutex); + } + } + /* Notify the fast load thread to quit */ if (mis->postcopy_qemufile_dst) { qemu_file_shutdown(mis->postcopy_qemufile_dst); } @@ -1277,8 +1301,20 @@ static int qemu_ufd_copy_ioctl(MigrationIncomingState *mis, void *host_addr, */ if (g_tree_lookup(mis->page_requested, host_addr)) { g_tree_remove(mis->page_requested, host_addr); - mis->page_requested_count--; + int left_pages = qatomic_dec_fetch(&mis->page_requested_count); + trace_postcopy_page_req_del(host_addr, mis->page_requested_count); + /* Order the update of count and read of preempt status */ + smp_mb(); + if (mis->preempt_thread_status == PREEMPT_THREAD_QUIT && + left_pages == 0) { + /* + * This probably means the main thread is waiting for us. + * Notify that we've finished receiving the last requested + * page. + */ + qemu_cond_signal(&mis->page_request_cond); + } } qemu_mutex_unlock(&mis->page_request_mutex); mark_postcopy_blocktime_end((uintptr_t)host_addr);