diff --git a/migration/migration.c b/migration/migration.c index 41a88fc50a..3dea06d577 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1117,6 +1117,7 @@ bool migration_is_setup_or_active(void) case MIGRATION_STATUS_ACTIVE: case MIGRATION_STATUS_POSTCOPY_ACTIVE: case MIGRATION_STATUS_POSTCOPY_PAUSED: + case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP: case MIGRATION_STATUS_POSTCOPY_RECOVER: case MIGRATION_STATUS_SETUP: case MIGRATION_STATUS_PRE_SWITCHOVER: @@ -1139,6 +1140,7 @@ bool migration_is_running(void) case MIGRATION_STATUS_ACTIVE: case MIGRATION_STATUS_POSTCOPY_ACTIVE: case MIGRATION_STATUS_POSTCOPY_PAUSED: + case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP: case MIGRATION_STATUS_POSTCOPY_RECOVER: case MIGRATION_STATUS_SETUP: case MIGRATION_STATUS_PRE_SWITCHOVER: @@ -1276,6 +1278,7 @@ static void fill_source_migration_info(MigrationInfo *info) case MIGRATION_STATUS_PRE_SWITCHOVER: case MIGRATION_STATUS_DEVICE: case MIGRATION_STATUS_POSTCOPY_PAUSED: + case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP: case MIGRATION_STATUS_POSTCOPY_RECOVER: /* TODO add some postcopy stats */ populate_time_info(info, s); @@ -1482,9 +1485,30 @@ static void migrate_error_free(MigrationState *s) static void migrate_fd_error(MigrationState *s, const Error *error) { + MigrationStatus current = s->state; + MigrationStatus next; + assert(s->to_dst_file == NULL); - migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, - MIGRATION_STATUS_FAILED); + + switch (current) { + case MIGRATION_STATUS_SETUP: + next = MIGRATION_STATUS_FAILED; + break; + case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP: + /* Never fail a postcopy migration; switch back to PAUSED instead */ + next = MIGRATION_STATUS_POSTCOPY_PAUSED; + break; + default: + /* + * This really shouldn't happen. Just be careful to not crash a VM + * just for this. Instead, dump something. + */ + error_report("%s: Illegal migration status (%s) detected", + __func__, MigrationStatus_str(current)); + return; + } + + migrate_set_state(&s->state, current, next); migrate_set_error(s, error); } @@ -1585,6 +1609,7 @@ bool migration_in_postcopy(void) switch (s->state) { case MIGRATION_STATUS_POSTCOPY_ACTIVE: case MIGRATION_STATUS_POSTCOPY_PAUSED: + case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP: case MIGRATION_STATUS_POSTCOPY_RECOVER: return true; default: @@ -1972,6 +1997,9 @@ static bool migrate_prepare(MigrationState *s, bool resume, Error **errp) return false; } + migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED, + MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP); + /* This is a resume, skip init status */ return true; } @@ -3004,9 +3032,9 @@ static MigThrError postcopy_pause(MigrationState *s) * We wait until things fixed up. Then someone will setup the * status back for us. */ - while (s->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { + do { qemu_sem_wait(&s->postcopy_pause_sem); - } + } while (postcopy_is_paused(s->state)); if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { /* Woken up by a recover procedure. Give it a shot */ @@ -3702,7 +3730,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) { Error *local_err = NULL; uint64_t rate_limit; - bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED; + bool resume = (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP); int ret; /* @@ -3769,7 +3797,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) if (resume) { /* Wakeup the main migration thread to do the recovery */ - migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED, + migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP, MIGRATION_STATUS_POSTCOPY_RECOVER); qemu_sem_post(&s->postcopy_pause_sem); return; diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 97701e6bb2..1c374b7ea1 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -1770,3 +1770,9 @@ void *postcopy_preempt_thread(void *opaque) return NULL; } + +bool postcopy_is_paused(MigrationStatus status) +{ + return status == MIGRATION_STATUS_POSTCOPY_PAUSED || + status == MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP; +} diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h index ecae941211..a6df1b2811 100644 --- a/migration/postcopy-ram.h +++ b/migration/postcopy-ram.h @@ -13,6 +13,8 @@ #ifndef QEMU_POSTCOPY_RAM_H #define QEMU_POSTCOPY_RAM_H +#include "qapi/qapi-types-migration.h" + /* Return true if the host supports everything we need to do postcopy-ram */ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, Error **errp); @@ -193,5 +195,6 @@ enum PostcopyChannels { void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file); void postcopy_preempt_setup(MigrationState *s); int postcopy_preempt_establish_channel(MigrationState *s); +bool postcopy_is_paused(MigrationStatus status); #endif diff --git a/migration/savevm.c b/migration/savevm.c index e71410d8c1..deb57833f8 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -2864,9 +2864,9 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis) error_report("Detected IO failure for postcopy. " "Migration paused."); - while (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { + do { qemu_sem_wait(&mis->postcopy_pause_sem_dst); - } + } while (postcopy_is_paused(mis->state)); trace_postcopy_pause_incoming_continued(); diff --git a/qapi/migration.json b/qapi/migration.json index de6c8b0444..0f24206bce 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -142,6 +142,9 @@ # # @postcopy-paused: during postcopy but paused. (since 3.0) # +# @postcopy-recover-setup: setup phase for a postcopy recovery process, +# preparing for a recovery phase to start. (since 9.1) +# # @postcopy-recover: trying to recover from a paused postcopy. (since # 3.0) # @@ -166,6 +169,7 @@ { 'enum': 'MigrationStatus', 'data': [ 'none', 'setup', 'cancelling', 'cancelled', 'active', 'postcopy-active', 'postcopy-paused', + 'postcopy-recover-setup', 'postcopy-recover', 'completed', 'failed', 'colo', 'pre-switchover', 'device', 'wait-unplug' ] } ##