Migration bits from the COLO project

-----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2
 
 iQItBAABCAAXBQJYFc37EBxhbWl0QGtlcm5lbC5vcmcACgkQ6wtN/GV+9nDdqQ/9
 H+di+q/zF6aOEuXRCN0ud9R81fZ7nLve3e9EbKCNZXnxN3M3+zQj0At+e/SBEoTc
 rRwqJmNlRX3TWViWsAYmddgtopZ9R9DWwW/VsKjS2Ng230YShrA/o20hu2dJkwl3
 CN4vAObzc/gxM59NWUlMnTXOG+Z9fI1NlEf0vZ2484a59KPVIE7W1zZccT1F8MNq
 sfa3RlOGBchNO2Rfzrr6cFGGH7UTfWiftPs7EDOfN/YBcpld6V4DfPWdPP8r2DSX
 gG7D3DJuuqPxZCjl0Nm1OjaIunYfVrRpMPMeNyo1+kTVbvvhDPFjah/MSWu8XJ2c
 N5lSqikIAWfMbQVW9gDpQ0495eRQTWA7VIlCbwN6mqNxyBvQMA+licFq6UFFrCMp
 quC3gO+daz3fKvUhi23TpebbqKLHA5OZA5ZvkjGDgkKPMCJyQRBZHLb81t5xsulZ
 cXuzAOeRcXK9aEJvcrDgWwxzi3PN8zg74RF1ZV8gxM4DkHKohlsnbgshWqGkFh3M
 +S5tEPqVlOlDO9juf6rlwNnVbWhFDFEGMKjI9XMTWwVWTREbqyP86fP66h2C4qc6
 34yAHi5G2i43dxzHgpHC0MpU0XenO0EYdu+8Tcx35LSBSfkOeD7pU1DeZQgbI51m
 ZQSnLDJqv2HVdoZT7vaIjwuhtEl84xqelFdVg+cY7Gw=
 =sur7
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/amit-migration/tags/migration-for-2.8' into staging

Migration bits from the COLO project

# gpg: Signature made Sun 30 Oct 2016 10:39:55 GMT
# gpg:                using RSA key 0xEB0B4DFC657EF670
# gpg: Good signature from "Amit Shah <amit@amitshah.net>"
# gpg:                 aka "Amit Shah <amit@kernel.org>"
# gpg:                 aka "Amit Shah <amitshah@gmx.net>"
# Primary key fingerprint: 48CA 3722 5FE7 F4A8 B337  2735 1E9A 3B5F 8540 83B6
#      Subkey fingerprint: CC63 D332 AB8F 4617 4529  6534 EB0B 4DFC 657E F670

* remotes/amit-migration/tags/migration-for-2.8:
  MAINTAINERS: Add maintainer for COLO framework related files
  configure: Support enable/disable COLO feature
  docs: Add documentation for COLO feature
  COLO: Implement failover work for secondary VM
  COLO: Implement the process of failover for primary VM
  COLO: Introduce state to record failover process
  COLO: Add 'x-colo-lost-heartbeat' command to trigger failover
  COLO: Synchronize PVM's state to SVM periodically
  COLO: Add checkpoint-delay parameter for migrate-set-parameters
  COLO: Load VMState into QIOChannelBuffer before restore it
  COLO: Send PVM state to secondary side when do checkpoint
  COLO: Add a new RunState RUN_STATE_COLO
  COLO: Introduce checkpointing protocol
  COLO: Establish a new communicating path for COLO
  migration: Switch to COLO process after finishing loadvm
  migration: Enter into COLO mode after migration if COLO is enabled
  COLO: migrate COLO related info to secondary node
  migration: Introduce capability 'x-colo' to migration

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2016-10-31 13:06:38 +00:00
commit eab9e9629c
21 changed files with 1279 additions and 21 deletions

View file

@ -1426,6 +1426,14 @@ F: util/uuid.c
F: include/qemu/uuid.h
F: tests/test-uuid.c
COLO Framework
M: zhanghailiang <zhang.zhanghailiang@huawei.com>
S: Maintained
F: migration/colo*
F: include/migration/colo.h
F: include/migration/failover.h
F: docs/COLO-FT.txt
COLO Proxy
M: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
M: Li Zhijian <lizhijian@cn.fujitsu.com>

11
configure vendored
View file

@ -230,6 +230,7 @@ vhost_net="no"
vhost_scsi="no"
vhost_vsock="no"
kvm="no"
colo="yes"
rdma=""
gprof="no"
debug_tcg="no"
@ -918,6 +919,10 @@ for opt do
;;
--enable-kvm) kvm="yes"
;;
--disable-colo) colo="no"
;;
--enable-colo) colo="yes"
;;
--disable-tcg-interpreter) tcg_interpreter="no"
;;
--enable-tcg-interpreter) tcg_interpreter="yes"
@ -1366,6 +1371,7 @@ disabled with --disable-FEATURE, default is enabled if available:
fdt fdt device tree
bluez bluez stack connectivity
kvm KVM acceleration support
colo COarse-grain LOck-stepping VM for Non-stop Service
rdma RDMA-based migration support
vde support for vde network
netmap support for netmap network
@ -5004,6 +5010,7 @@ echo "Linux AIO support $linux_aio"
echo "ATTR/XATTR support $attr"
echo "Install blobs $blobs"
echo "KVM support $kvm"
echo "COLO support $colo"
echo "RDMA support $rdma"
echo "TCG interpreter $tcg_interpreter"
echo "fdt support $fdt"
@ -5639,6 +5646,10 @@ if have_backend "syslog"; then
fi
echo "CONFIG_TRACE_FILE=$trace_file" >> $config_host_mak
if test "$colo" = "yes"; then
echo "CONFIG_COLO=y" >> $config_host_mak
fi
if test "$rdma" = "yes" ; then
echo "CONFIG_RDMA=y" >> $config_host_mak
fi

189
docs/COLO-FT.txt Normal file
View file

@ -0,0 +1,189 @@
COarse-grained LOck-stepping Virtual Machines for Non-stop Service
----------------------------------------
Copyright (c) 2016 Intel Corporation
Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
Copyright (c) 2016 Fujitsu, Corp.
This work is licensed under the terms of the GNU GPL, version 2 or later.
See the COPYING file in the top-level directory.
This document gives an overview of COLO's design and how to use it.
== Background ==
Virtual machine (VM) replication is a well known technique for providing
application-agnostic software-implemented hardware fault tolerance,
also known as "non-stop service".
COLO (COarse-grained LOck-stepping) is a high availability solution.
Both primary VM (PVM) and secondary VM (SVM) run in parallel. They receive the
same request from client, and generate response in parallel too.
If the response packets from PVM and SVM are identical, they are released
immediately. Otherwise, a VM checkpoint (on demand) is conducted.
== Architecture ==
The architecture of COLO is shown in the diagram below.
It consists of a pair of networked physical nodes:
The primary node running the PVM, and the secondary node running the SVM
to maintain a valid replica of the PVM.
PVM and SVM execute in parallel and generate output of response packets for
client requests according to the application semantics.
The incoming packets from the client or external network are received by the
primary node, and then forwarded to the secondary node, so that both the PVM
and the SVM are stimulated with the same requests.
COLO receives the outbound packets from both the PVM and SVM and compares them
before allowing the output to be sent to clients.
The SVM is qualified as a valid replica of the PVM, as long as it generates
identical responses to all client requests. Once the differences in the outputs
are detected between the PVM and SVM, COLO withholds transmission of the
outbound packets until it has successfully synchronized the PVM state to the SVM.
Primary Node Secondary Node
+------------+ +-----------------------+ +------------------------+ +------------+
| | | HeartBeat |<----->| HeartBeat | | |
| Primary VM | +-----------|-----------+ +-----------|------------+ |Secondary VM|
| | | | | |
| | +-----------|-----------+ +-----------|------------+ | |
| | |QEMU +---v----+ | |QEMU +----v---+ | | |
| | | |Failover| | | |Failover| | | |
| | | +--------+ | | +--------+ | | |
| | | +---------------+ | | +---------------+ | | |
| | | | VM Checkpoint |-------------->| VM Checkpoint | | | |
| | | +---------------+ | | +---------------+ | | |
| | | | | | | |
|Requests<---------------------------^------------------------------------------>Requests|
|Responses----------------------\ /--|--------------\ /------------------------Responses|
| | | | | | | | | | | | |
| | | +-----------+ | | | | | | | +------------+ | | |
| | | | COLO disk | | | | | | | | | COLO disk | | | |
| | | | Manager |-|-|--|--------------|--|->| Manager | | | |
| | | +|----------+ | | | | | | | +-----------|+ | | |
| | | | | | | | | | | | | | |
+------------+ +--|------------|-|--|--+ +---|--|--------------|--+ +------------+
| | | | | | |
+-------------+ | +----------v-v--|--+ +---|--v-----------+ | +-------------+
| VM Monitor | | | COLO Proxy | | COLO Proxy | | | VM Monitor |
| | | |(compare packet) | | (adjust sequence)| | | |
+-------------+ | +----------|----^--+ +------------------+ | +-------------+
| | | |
+------------------|------------|----|--+ +---------------------|------------------+
| Kernel | | | | | Kernel | |
+------------------|------------|----|--+ +---------------------|------------------+
| | | |
+--------------v+ +--------v----|--+ +------------------+ +v-------------+
| Storage | |External Network| | External Network | | Storage |
+---------------+ +----------------+ +------------------+ +--------------+
== Components introduction ==
You can see there are several components in COLO's diagram of architecture.
Their functions are described below.
HeartBeat:
Runs on both the primary and secondary nodes, to periodically check platform
availability. When the primary node suffers a hardware fail-stop failure,
the heartbeat stops responding, the secondary node will trigger a failover
as soon as it determines the absence.
COLO disk Manager:
When primary VM writes data into image, the colo disk manger captures this data
and sends it to secondary VM's which makes sure the context of secondary VM's
image is consistent with the context of primary VM 's image.
For more details, please refer to docs/block-replication.txt.
Checkpoint/Failover Controller:
Modifications of save/restore flow to realize continuous migration,
to make sure the state of VM in Secondary side is always consistent with VM in
Primary side.
COLO Proxy:
Delivers packets to Primary and Seconday, and then compare the responses from
both side. Then decide whether to start a checkpoint according to some rules.
Please refer to docs/colo-proxy.txt for more informations.
Note:
HeartBeat has not been implemented yet, so you need to trigger failover process
by using 'x-colo-lost-heartbeat' command.
== Test procedure ==
1. Startup qemu
Primary:
# qemu-kvm -enable-kvm -m 2048 -smp 2 -qmp stdio -vnc :7 -name primary \
-device piix3-usb-uhci \
-device usb-tablet -netdev tap,id=hn0,vhost=off \
-device virtio-net-pci,id=net-pci0,netdev=hn0 \
-drive if=virtio,id=primary-disk0,driver=quorum,read-pattern=fifo,vote-threshold=1,\
children.0.file.filename=1.raw,\
children.0.driver=raw -S
Secondary:
# qemu-kvm -enable-kvm -m 2048 -smp 2 -qmp stdio -vnc :7 -name secondary \
-device piix3-usb-uhci \
-device usb-tablet -netdev tap,id=hn0,vhost=off \
-device virtio-net-pci,id=net-pci0,netdev=hn0 \
-drive if=none,id=secondary-disk0,file.filename=1.raw,driver=raw,node-name=node0 \
-drive if=virtio,id=active-disk0,driver=replication,mode=secondary,\
file.driver=qcow2,top-id=active-disk0,\
file.file.filename=/mnt/ramfs/active_disk.img,\
file.backing.driver=qcow2,\
file.backing.file.filename=/mnt/ramfs/hidden_disk.img,\
file.backing.backing=secondary-disk0 \
-incoming tcp:0:8888
2. On Secondary VM's QEMU monitor, issue command
{'execute':'qmp_capabilities'}
{ 'execute': 'nbd-server-start',
'arguments': {'addr': {'type': 'inet', 'data': {'host': 'xx.xx.xx.xx', 'port': '8889'} } }
}
{'execute': 'nbd-server-add', 'arguments': {'device': 'secondeary-disk0', 'writable': true } }
Note:
a. The qmp command nbd-server-start and nbd-server-add must be run
before running the qmp command migrate on primary QEMU
b. Active disk, hidden disk and nbd target's length should be the
same.
c. It is better to put active disk and hidden disk in ramdisk.
3. On Primary VM's QEMU monitor, issue command:
{'execute':'qmp_capabilities'}
{ 'execute': 'human-monitor-command',
'arguments': {'command-line': 'drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=xx.xx.xx.xx,file.port=8889,file.export=secondary-disk0,node-name=nbd_client0'}}
{ 'execute':'x-blockdev-change', 'arguments':{'parent': 'primary-disk0', 'node': 'nbd_client0' } }
{ 'execute': 'migrate-set-capabilities',
'arguments': {'capabilities': [ {'capability': 'x-colo', 'state': true } ] } }
{ 'execute': 'migrate', 'arguments': {'uri': 'tcp:xx.xx.xx.xx:8888' } }
Note:
a. There should be only one NBD Client for each primary disk.
b. xx.xx.xx.xx is the secondary physical machine's hostname or IP
c. The qmp command line must be run after running qmp command line in
secondary qemu.
4. After the above steps, you will see, whenever you make changes to PVM, SVM will be synced.
You can issue command '{ "execute": "migrate-set-parameters" , "arguments":{ "x-checkpoint-delay": 2000 } }'
to change the checkpoint period time
5. Failover test
You can kill Primary VM and run 'x_colo_lost_heartbeat' in Secondary VM's
monitor at the same time, then SVM will failover and client will not detect this
change.
Before issuing '{ "execute": "x-colo-lost-heartbeat" }' command, we have to
issue block related command to stop block replication.
Primary:
Remove the nbd child from the quorum:
{ 'execute': 'x-blockdev-change', 'arguments': {'parent': 'colo-disk0', 'child': 'children.1'}}
{ 'execute': 'human-monitor-command','arguments': {'command-line': 'drive_del blk-buddy0'}}
Note: there is no qmp command to remove the blockdev now
Secondary:
The primary host is down, so we should do the following thing:
{ 'execute': 'nbd-server-stop' }
== TODO ==
1. Support continuous VM replication.
2. Support shared storage.
3. Develop the heartbeat part.
4. Reduce checkpoint VMs downtime while doing checkpoint.

View file

@ -554,6 +554,16 @@ Example:
-> { "execute": "migrate_set_downtime", "arguments": { "value": 0.1 } }
<- { "return": {} }
x-colo-lost-heartbeat
--------------------
Tell COLO that heartbeat is lost, a failover or takeover is needed.
Example:
-> { "execute": "x-colo-lost-heartbeat" }
<- { "return": {} }
client_migrate_info
-------------------
@ -2861,6 +2871,7 @@ Enable/Disable migration capabilities
- "compress": use multiple compression threads to accelerate live migration
- "events": generate events for each migration state change
- "postcopy-ram": postcopy mode for live migration
- "x-colo": COarse-Grain LOck Stepping (COLO) for Non-stop Service
Arguments:
@ -2882,6 +2893,7 @@ Query current migration capabilities
- "compress": Multiple compression threads state (json-bool)
- "events": Migration state change event state (json-bool)
- "postcopy-ram": postcopy ram state (json-bool)
- "x-colo": COarse-Grain LOck Stepping for Non-stop Service (json-bool)
Arguments:
@ -2895,7 +2907,8 @@ Example:
{"state": false, "capability": "zero-blocks"},
{"state": false, "capability": "compress"},
{"state": true, "capability": "events"},
{"state": false, "capability": "postcopy-ram"}
{"state": false, "capability": "postcopy-ram"},
{"state": false, "capability": "x-colo"}
]}
migrate-set-parameters
@ -2913,6 +2926,8 @@ Set migration parameters
- "max-bandwidth": set maximum speed for migrations (in bytes/sec) (json-int)
- "downtime-limit": set maximum tolerated downtime (in milliseconds) for
migrations (json-int)
- "x-checkpoint-delay": set the delay time for periodic checkpoint (json-int)
Arguments:
Example:

View file

@ -1037,6 +1037,21 @@ STEXI
@findex migrate_start_postcopy
Switch in-progress migration to postcopy mode. Ignored after the end of
migration (or once already in postcopy).
ETEXI
{
.name = "x_colo_lost_heartbeat",
.args_type = "",
.params = "",
.help = "Tell COLO that heartbeat is lost,\n\t\t\t"
"a failover or takeover is needed.",
.cmd = hmp_x_colo_lost_heartbeat,
},
STEXI
@item x_colo_lost_heartbeat
@findex x_colo_lost_heartbeat
Tell COLO that heartbeat is lost, a failover or takeover is needed.
ETEXI
{

16
hmp.c
View file

@ -318,6 +318,9 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict)
monitor_printf(mon, " %s: %" PRId64 " milliseconds",
MigrationParameter_lookup[MIGRATION_PARAMETER_DOWNTIME_LIMIT],
params->downtime_limit);
monitor_printf(mon, " %s: %" PRId64,
MigrationParameter_lookup[MIGRATION_PARAMETER_X_CHECKPOINT_DELAY],
params->x_checkpoint_delay);
monitor_printf(mon, "\n");
}
@ -1386,6 +1389,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
p.has_downtime_limit = true;
use_int_value = true;
break;
case MIGRATION_PARAMETER_X_CHECKPOINT_DELAY:
p.has_x_checkpoint_delay = true;
use_int_value = true;
break;
}
if (use_int_value) {
@ -1402,6 +1409,7 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
p.cpu_throttle_initial = valueint;
p.cpu_throttle_increment = valueint;
p.downtime_limit = valueint;
p.x_checkpoint_delay = valueint;
}
qmp_migrate_set_parameters(&p, &err);
@ -1443,6 +1451,14 @@ void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict)
hmp_handle_error(mon, &err);
}
void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict)
{
Error *err = NULL;
qmp_x_colo_lost_heartbeat(&err);
hmp_handle_error(mon, &err);
}
void hmp_set_password(Monitor *mon, const QDict *qdict)
{
const char *protocol = qdict_get_str(qdict, "protocol");

1
hmp.h
View file

@ -72,6 +72,7 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict);
void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict);
void hmp_client_migrate_info(Monitor *mon, const QDict *qdict);
void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict);
void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict);
void hmp_set_password(Monitor *mon, const QDict *qdict);
void hmp_expire_password(Monitor *mon, const QDict *qdict);
void hmp_eject(Monitor *mon, const QDict *qdict);

38
include/migration/colo.h Normal file
View file

@ -0,0 +1,38 @@
/*
* COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
* (a.k.a. Fault Tolerance or Continuous Replication)
*
* Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
* Copyright (c) 2016 FUJITSU LIMITED
* Copyright (c) 2016 Intel Corporation
*
* This work is licensed under the terms of the GNU GPL, version 2 or
* later. See the COPYING file in the top-level directory.
*/
#ifndef QEMU_COLO_H
#define QEMU_COLO_H
#include "qemu-common.h"
#include "migration/migration.h"
#include "qemu/coroutine_int.h"
#include "qemu/thread.h"
#include "qemu/main-loop.h"
bool colo_supported(void);
void colo_info_init(void);
void migrate_start_colo_process(MigrationState *s);
bool migration_in_colo_state(void);
/* loadvm */
bool migration_incoming_enable_colo(void);
void migration_incoming_exit_colo(void);
void *colo_process_incoming_thread(void *opaque);
bool migration_incoming_in_colo_state(void);
COLOMode get_colo_mode(void);
/* failover */
void colo_do_failover(MigrationState *s);
#endif

View file

@ -0,0 +1,26 @@
/*
* COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
* (a.k.a. Fault Tolerance or Continuous Replication)
*
* Copyright (c) 2016 HUAWEI TECHNOLOGIES CO.,LTD.
* Copyright (c) 2016 FUJITSU LIMITED
* Copyright (c) 2016 Intel Corporation
*
* This work is licensed under the terms of the GNU GPL, version 2 or
* later. See the COPYING file in the top-level directory.
*/
#ifndef QEMU_FAILOVER_H
#define QEMU_FAILOVER_H
#include "qemu-common.h"
#include "qapi-types.h"
void failover_init_state(void);
FailoverStatus failover_set_state(FailoverStatus old_state,
FailoverStatus new_state);
FailoverStatus failover_get_state(void);
void failover_request_active(Error **errp);
bool failover_request_is_active(void);
#endif

View file

@ -21,6 +21,7 @@
#include "migration/vmstate.h"
#include "qapi-types.h"
#include "exec/cpu-common.h"
#include "qemu/coroutine_int.h"
#define QEMU_VM_FILE_MAGIC 0x5145564d
#define QEMU_VM_FILE_VERSION_COMPAT 0x00000002
@ -107,6 +108,12 @@ struct MigrationIncomingState {
QEMUBH *bh;
int state;
bool have_colo_incoming_thread;
QemuThread colo_incoming_thread;
/* The coroutine we should enter (back) after failover */
Coroutine *migration_incoming_co;
/* See savevm.c */
LoadStateEntry_Head loadvm_handlers;
};
@ -298,6 +305,7 @@ int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen);
int migrate_use_xbzrle(void);
int64_t migrate_xbzrle_cache_size(void);
bool migrate_colo_enabled(void);
int64_t xbzrle_cache_resize(int64_t new_size);

View file

@ -1,5 +1,7 @@
common-obj-y += migration.o socket.o fd.o exec.o
common-obj-y += tls.o
common-obj-y += colo-comm.o
common-obj-$(CONFIG_COLO) += colo.o colo-failover.o
common-obj-y += vmstate.o
common-obj-y += qemu-file.o
common-obj-y += qemu-file-channel.o

72
migration/colo-comm.c Normal file
View file

@ -0,0 +1,72 @@
/*
* COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
* (a.k.a. Fault Tolerance or Continuous Replication)
*
* Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
* Copyright (c) 2016 FUJITSU LIMITED
* Copyright (c) 2016 Intel Corporation
*
* This work is licensed under the terms of the GNU GPL, version 2 or
* later. See the COPYING file in the top-level directory.
*
*/
#include "qemu/osdep.h"
#include <migration/colo.h>
#include "trace.h"
typedef struct {
bool colo_requested;
} COLOInfo;
static COLOInfo colo_info;
COLOMode get_colo_mode(void)
{
if (migration_in_colo_state()) {
return COLO_MODE_PRIMARY;
} else if (migration_incoming_in_colo_state()) {
return COLO_MODE_SECONDARY;
} else {
return COLO_MODE_UNKNOWN;
}
}
static void colo_info_pre_save(void *opaque)
{
COLOInfo *s = opaque;
s->colo_requested = migrate_colo_enabled();
}
static bool colo_info_need(void *opaque)
{
return migrate_colo_enabled();
}
static const VMStateDescription colo_state = {
.name = "COLOState",
.version_id = 1,
.minimum_version_id = 1,
.pre_save = colo_info_pre_save,
.needed = colo_info_need,
.fields = (VMStateField[]) {
VMSTATE_BOOL(colo_requested, COLOInfo),
VMSTATE_END_OF_LIST()
},
};
void colo_info_init(void)
{
vmstate_register(NULL, 0, &colo_state, &colo_info);
}
bool migration_incoming_enable_colo(void)
{
return colo_info.colo_requested;
}
void migration_incoming_exit_colo(void)
{
colo_info.colo_requested = false;
}

83
migration/colo-failover.c Normal file
View file

@ -0,0 +1,83 @@
/*
* COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
* (a.k.a. Fault Tolerance or Continuous Replication)
*
* Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
* Copyright (c) 2016 FUJITSU LIMITED
* Copyright (c) 2016 Intel Corporation
*
* This work is licensed under the terms of the GNU GPL, version 2 or
* later. See the COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
#include "migration/colo.h"
#include "migration/failover.h"
#include "qmp-commands.h"
#include "qapi/qmp/qerror.h"
#include "qemu/error-report.h"
#include "trace.h"
static QEMUBH *failover_bh;
static FailoverStatus failover_state;
static void colo_failover_bh(void *opaque)
{
int old_state;
qemu_bh_delete(failover_bh);
failover_bh = NULL;
old_state = failover_set_state(FAILOVER_STATUS_REQUIRE,
FAILOVER_STATUS_ACTIVE);
if (old_state != FAILOVER_STATUS_REQUIRE) {
error_report("Unknown error for failover, old_state = %s",
FailoverStatus_lookup[old_state]);
return;
}
colo_do_failover(NULL);
}
void failover_request_active(Error **errp)
{
if (failover_set_state(FAILOVER_STATUS_NONE,
FAILOVER_STATUS_REQUIRE) != FAILOVER_STATUS_NONE) {
error_setg(errp, "COLO failover is already actived");
return;
}
failover_bh = qemu_bh_new(colo_failover_bh, NULL);
qemu_bh_schedule(failover_bh);
}
void failover_init_state(void)
{
failover_state = FAILOVER_STATUS_NONE;
}
FailoverStatus failover_set_state(FailoverStatus old_state,
FailoverStatus new_state)
{
FailoverStatus old;
old = atomic_cmpxchg(&failover_state, old_state, new_state);
if (old == old_state) {
trace_colo_failover_set_state(FailoverStatus_lookup[new_state]);
}
return old;
}
FailoverStatus failover_get_state(void)
{
return atomic_read(&failover_state);
}
void qmp_x_colo_lost_heartbeat(Error **errp)
{
if (get_colo_mode() == COLO_MODE_UNKNOWN) {
error_setg(errp, QERR_FEATURE_DISABLED, "colo");
return;
}
failover_request_active(errp);
}

529
migration/colo.c Normal file
View file

@ -0,0 +1,529 @@
/*
* COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
* (a.k.a. Fault Tolerance or Continuous Replication)
*
* Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
* Copyright (c) 2016 FUJITSU LIMITED
* Copyright (c) 2016 Intel Corporation
*
* This work is licensed under the terms of the GNU GPL, version 2 or
* later. See the COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
#include "qemu/timer.h"
#include "sysemu/sysemu.h"
#include "migration/colo.h"
#include "io/channel-buffer.h"
#include "trace.h"
#include "qemu/error-report.h"
#include "qapi/error.h"
#include "migration/failover.h"
#define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
bool colo_supported(void)
{
return true;
}
bool migration_in_colo_state(void)
{
MigrationState *s = migrate_get_current();
return (s->state == MIGRATION_STATUS_COLO);
}
bool migration_incoming_in_colo_state(void)
{
MigrationIncomingState *mis = migration_incoming_get_current();
return mis && (mis->state == MIGRATION_STATUS_COLO);
}
static bool colo_runstate_is_stopped(void)
{
return runstate_check(RUN_STATE_COLO) || !runstate_is_running();
}
static void secondary_vm_do_failover(void)
{
int old_state;
MigrationIncomingState *mis = migration_incoming_get_current();
migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
MIGRATION_STATUS_COMPLETED);
if (!autostart) {
error_report("\"-S\" qemu option will be ignored in secondary side");
/* recover runstate to normal migration finish state */
autostart = true;
}
old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
FAILOVER_STATUS_COMPLETED);
if (old_state != FAILOVER_STATUS_ACTIVE) {
error_report("Incorrect state (%s) while doing failover for "
"secondary VM", FailoverStatus_lookup[old_state]);
return;
}
/* For Secondary VM, jump to incoming co */
if (mis->migration_incoming_co) {
qemu_coroutine_enter(mis->migration_incoming_co);
}
}
static void primary_vm_do_failover(void)
{
MigrationState *s = migrate_get_current();
int old_state;
migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
MIGRATION_STATUS_COMPLETED);
old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
FAILOVER_STATUS_COMPLETED);
if (old_state != FAILOVER_STATUS_ACTIVE) {
error_report("Incorrect state (%s) while doing failover for Primary VM",
FailoverStatus_lookup[old_state]);
return;
}
}
void colo_do_failover(MigrationState *s)
{
/* Make sure VM stopped while failover happened. */
if (!colo_runstate_is_stopped()) {
vm_stop_force_state(RUN_STATE_COLO);
}
if (get_colo_mode() == COLO_MODE_PRIMARY) {
primary_vm_do_failover();
} else {
secondary_vm_do_failover();
}
}
static void colo_send_message(QEMUFile *f, COLOMessage msg,
Error **errp)
{
int ret;
if (msg >= COLO_MESSAGE__MAX) {
error_setg(errp, "%s: Invalid message", __func__);
return;
}
qemu_put_be32(f, msg);
qemu_fflush(f);
ret = qemu_file_get_error(f);
if (ret < 0) {
error_setg_errno(errp, -ret, "Can't send COLO message");
}
trace_colo_send_message(COLOMessage_lookup[msg]);
}
static void colo_send_message_value(QEMUFile *f, COLOMessage msg,
uint64_t value, Error **errp)
{
Error *local_err = NULL;
int ret;
colo_send_message(f, msg, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
qemu_put_be64(f, value);
qemu_fflush(f);
ret = qemu_file_get_error(f);
if (ret < 0) {
error_setg_errno(errp, -ret, "Failed to send value for message:%s",
COLOMessage_lookup[msg]);
}
}
static COLOMessage colo_receive_message(QEMUFile *f, Error **errp)
{
COLOMessage msg;
int ret;
msg = qemu_get_be32(f);
ret = qemu_file_get_error(f);
if (ret < 0) {
error_setg_errno(errp, -ret, "Can't receive COLO message");
return msg;
}
if (msg >= COLO_MESSAGE__MAX) {
error_setg(errp, "%s: Invalid message", __func__);
return msg;
}
trace_colo_receive_message(COLOMessage_lookup[msg]);
return msg;
}
static void colo_receive_check_message(QEMUFile *f, COLOMessage expect_msg,
Error **errp)
{
COLOMessage msg;
Error *local_err = NULL;
msg = colo_receive_message(f, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
if (msg != expect_msg) {
error_setg(errp, "Unexpected COLO message %d, expected %d",
msg, expect_msg);
}
}
static uint64_t colo_receive_message_value(QEMUFile *f, uint32_t expect_msg,
Error **errp)
{
Error *local_err = NULL;
uint64_t value;
int ret;
colo_receive_check_message(f, expect_msg, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return 0;
}
value = qemu_get_be64(f);
ret = qemu_file_get_error(f);
if (ret < 0) {
error_setg_errno(errp, -ret, "Failed to get value for COLO message: %s",
COLOMessage_lookup[expect_msg]);
}
return value;
}
static int colo_do_checkpoint_transaction(MigrationState *s,
QIOChannelBuffer *bioc,
QEMUFile *fb)
{
Error *local_err = NULL;
int ret = -1;
colo_send_message(s->to_dst_file, COLO_MESSAGE_CHECKPOINT_REQUEST,
&local_err);
if (local_err) {
goto out;
}
colo_receive_check_message(s->rp_state.from_dst_file,
COLO_MESSAGE_CHECKPOINT_REPLY, &local_err);
if (local_err) {
goto out;
}
/* Reset channel-buffer directly */
qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
bioc->usage = 0;
qemu_mutex_lock_iothread();
if (failover_get_state() != FAILOVER_STATUS_NONE) {
qemu_mutex_unlock_iothread();
goto out;
}
vm_stop_force_state(RUN_STATE_COLO);
qemu_mutex_unlock_iothread();
trace_colo_vm_state_change("run", "stop");
/*
* Failover request bh could be called after vm_stop_force_state(),
* So we need check failover_request_is_active() again.
*/
if (failover_get_state() != FAILOVER_STATUS_NONE) {
goto out;
}
/* Disable block migration */
s->params.blk = 0;
s->params.shared = 0;
qemu_savevm_state_header(fb);
qemu_savevm_state_begin(fb, &s->params);
qemu_mutex_lock_iothread();
qemu_savevm_state_complete_precopy(fb, false);
qemu_mutex_unlock_iothread();
qemu_fflush(fb);
colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err);
if (local_err) {
goto out;
}
/*
* We need the size of the VMstate data in Secondary side,
* With which we can decide how much data should be read.
*/
colo_send_message_value(s->to_dst_file, COLO_MESSAGE_VMSTATE_SIZE,
bioc->usage, &local_err);
if (local_err) {
goto out;
}
qemu_put_buffer(s->to_dst_file, bioc->data, bioc->usage);
qemu_fflush(s->to_dst_file);
ret = qemu_file_get_error(s->to_dst_file);
if (ret < 0) {
goto out;
}
colo_receive_check_message(s->rp_state.from_dst_file,
COLO_MESSAGE_VMSTATE_RECEIVED, &local_err);
if (local_err) {
goto out;
}
colo_receive_check_message(s->rp_state.from_dst_file,
COLO_MESSAGE_VMSTATE_LOADED, &local_err);
if (local_err) {
goto out;
}
ret = 0;
qemu_mutex_lock_iothread();
vm_start();
qemu_mutex_unlock_iothread();
trace_colo_vm_state_change("stop", "run");
out:
if (local_err) {
error_report_err(local_err);
}
return ret;
}
static void colo_process_checkpoint(MigrationState *s)
{
QIOChannelBuffer *bioc;
QEMUFile *fb = NULL;
int64_t current_time, checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
Error *local_err = NULL;
int ret;
failover_init_state();
s->rp_state.from_dst_file = qemu_file_get_return_path(s->to_dst_file);
if (!s->rp_state.from_dst_file) {
error_report("Open QEMUFile from_dst_file failed");
goto out;
}
/*
* Wait for Secondary finish loading VM states and enter COLO
* restore.
*/
colo_receive_check_message(s->rp_state.from_dst_file,
COLO_MESSAGE_CHECKPOINT_READY, &local_err);
if (local_err) {
goto out;
}
bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc));
object_unref(OBJECT(bioc));
qemu_mutex_lock_iothread();
vm_start();
qemu_mutex_unlock_iothread();
trace_colo_vm_state_change("stop", "run");
while (s->state == MIGRATION_STATUS_COLO) {
if (failover_get_state() != FAILOVER_STATUS_NONE) {
error_report("failover request");
goto out;
}
current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
if (current_time - checkpoint_time <
s->parameters.x_checkpoint_delay) {
int64_t delay_ms;
delay_ms = s->parameters.x_checkpoint_delay -
(current_time - checkpoint_time);
g_usleep(delay_ms * 1000);
}
ret = colo_do_checkpoint_transaction(s, bioc, fb);
if (ret < 0) {
goto out;
}
checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
}
out:
/* Throw the unreported error message after exited from loop */
if (local_err) {
error_report_err(local_err);
}
if (fb) {
qemu_fclose(fb);
}
if (s->rp_state.from_dst_file) {
qemu_fclose(s->rp_state.from_dst_file);
}
}
void migrate_start_colo_process(MigrationState *s)
{
qemu_mutex_unlock_iothread();
migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_COLO);
colo_process_checkpoint(s);
qemu_mutex_lock_iothread();
}
static void colo_wait_handle_message(QEMUFile *f, int *checkpoint_request,
Error **errp)
{
COLOMessage msg;
Error *local_err = NULL;
msg = colo_receive_message(f, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
switch (msg) {
case COLO_MESSAGE_CHECKPOINT_REQUEST:
*checkpoint_request = 1;
break;
default:
*checkpoint_request = 0;
error_setg(errp, "Got unknown COLO message: %d", msg);
break;
}
}
void *colo_process_incoming_thread(void *opaque)
{
MigrationIncomingState *mis = opaque;
QEMUFile *fb = NULL;
QIOChannelBuffer *bioc = NULL; /* Cache incoming device state */
uint64_t total_size;
uint64_t value;
Error *local_err = NULL;
migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_COLO);
failover_init_state();
mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
if (!mis->to_src_file) {
error_report("COLO incoming thread: Open QEMUFile to_src_file failed");
goto out;
}
/*
* Note: the communication between Primary side and Secondary side
* should be sequential, we set the fd to unblocked in migration incoming
* coroutine, and here we are in the COLO incoming thread, so it is ok to
* set the fd back to blocked.
*/
qemu_file_set_blocking(mis->from_src_file, true);
bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
object_unref(OBJECT(bioc));
colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY,
&local_err);
if (local_err) {
goto out;
}
while (mis->state == MIGRATION_STATUS_COLO) {
int request;
colo_wait_handle_message(mis->from_src_file, &request, &local_err);
if (local_err) {
goto out;
}
assert(request);
if (failover_get_state() != FAILOVER_STATUS_NONE) {
error_report("failover request");
goto out;
}
/* FIXME: This is unnecessary for periodic checkpoint mode */
colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY,
&local_err);
if (local_err) {
goto out;
}
colo_receive_check_message(mis->from_src_file,
COLO_MESSAGE_VMSTATE_SEND, &local_err);
if (local_err) {
goto out;
}
value = colo_receive_message_value(mis->from_src_file,
COLO_MESSAGE_VMSTATE_SIZE, &local_err);
if (local_err) {
goto out;
}
/*
* Read VM device state data into channel buffer,
* It's better to re-use the memory allocated.
* Here we need to handle the channel buffer directly.
*/
if (value > bioc->capacity) {
bioc->capacity = value;
bioc->data = g_realloc(bioc->data, bioc->capacity);
}
total_size = qemu_get_buffer(mis->from_src_file, bioc->data, value);
if (total_size != value) {
error_report("Got %" PRIu64 " VMState data, less than expected"
" %" PRIu64, total_size, value);
goto out;
}
bioc->usage = total_size;
qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_RECEIVED,
&local_err);
if (local_err) {
goto out;
}
qemu_mutex_lock_iothread();
qemu_system_reset(VMRESET_SILENT);
if (qemu_loadvm_state(fb) < 0) {
error_report("COLO: loadvm failed");
qemu_mutex_unlock_iothread();
goto out;
}
qemu_mutex_unlock_iothread();
colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED,
&local_err);
if (local_err) {
goto out;
}
}
out:
/* Throw the unreported error message after exited from loop */
if (local_err) {
error_report_err(local_err);
}
if (fb) {
qemu_fclose(fb);
}
if (mis->to_src_file) {
qemu_fclose(mis->to_src_file);
}
migration_incoming_exit_colo();
return NULL;
}

View file

@ -36,6 +36,7 @@
#include "exec/address-spaces.h"
#include "io/channel-buffer.h"
#include "io/channel-tls.h"
#include "migration/colo.h"
#define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */
@ -62,6 +63,11 @@
/* Migration XBZRLE default cache size */
#define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024)
/* The delay time (in ms) between two COLO checkpoints
* Note: Please change this default value to 10000 when we support hybrid mode.
*/
#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY 200
static NotifierList migration_state_notifiers =
NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
@ -94,6 +100,7 @@ MigrationState *migrate_get_current(void)
.cpu_throttle_increment = DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT,
.max_bandwidth = MAX_THROTTLE,
.downtime_limit = DEFAULT_MIGRATE_SET_DOWNTIME,
.x_checkpoint_delay = DEFAULT_MIGRATE_X_CHECKPOINT_DELAY,
},
};
@ -406,6 +413,18 @@ static void process_incoming_migration_co(void *opaque)
/* Else if something went wrong then just fall out of the normal exit */
}
/* we get COLO info, and know if we are in COLO mode */
if (!ret && migration_incoming_enable_colo()) {
mis->migration_incoming_co = qemu_coroutine_self();
qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
mis->have_colo_incoming_thread = true;
qemu_coroutine_yield();
/* Wait checkpoint incoming thread exit before free resource */
qemu_thread_join(&mis->colo_incoming_thread);
}
qemu_fclose(f);
free_xbzrle_decoded_buf();
@ -531,6 +550,9 @@ MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp)
caps = NULL; /* silence compiler warning */
for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
if (i == MIGRATION_CAPABILITY_X_COLO && !colo_supported()) {
continue;
}
if (head == NULL) {
head = g_malloc0(sizeof(*caps));
caps = head;
@ -571,6 +593,7 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp)
params->max_bandwidth = s->parameters.max_bandwidth;
params->has_downtime_limit = true;
params->downtime_limit = s->parameters.downtime_limit;
params->x_checkpoint_delay = s->parameters.x_checkpoint_delay;
return params;
}
@ -691,6 +714,10 @@ MigrationInfo *qmp_query_migrate(Error **errp)
get_xbzrle_cache_stats(info);
break;
case MIGRATION_STATUS_COLO:
info->has_status = true;
/* TODO: display COLO specific information (checkpoint info etc.) */
break;
case MIGRATION_STATUS_COMPLETED:
get_xbzrle_cache_stats(info);
@ -733,6 +760,14 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
}
for (cap = params; cap; cap = cap->next) {
if (cap->value->capability == MIGRATION_CAPABILITY_X_COLO) {
if (!colo_supported()) {
error_setg(errp, "COLO is not currently supported, please"
" configure with --enable-colo option in order to"
" support COLO feature");
continue;
}
}
s->enabled_capabilities[cap->value->capability] = cap->value->state;
}
@ -817,6 +852,11 @@ void qmp_migrate_set_parameters(MigrationParameters *params, Error **errp)
"an integer in the range of 0 to 2000000 milliseconds");
return;
}
if (params->has_x_checkpoint_delay && (params->x_checkpoint_delay < 0)) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
"x_checkpoint_delay",
"is invalid, it should be positive");
}
if (params->has_compress_level) {
s->parameters.compress_level = params->compress_level;
@ -851,6 +891,10 @@ void qmp_migrate_set_parameters(MigrationParameters *params, Error **errp)
if (params->has_downtime_limit) {
s->parameters.downtime_limit = params->downtime_limit;
}
if (params->has_x_checkpoint_delay) {
s->parameters.x_checkpoint_delay = params->x_checkpoint_delay;
}
}
@ -1101,7 +1145,8 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
params.shared = has_inc && inc;
if (migration_is_setup_or_active(s->state) ||
s->state == MIGRATION_STATUS_CANCELLING) {
s->state == MIGRATION_STATUS_CANCELLING ||
s->state == MIGRATION_STATUS_COLO) {
error_setg(errp, QERR_MIGRATION_ACTIVE);
return;
}
@ -1649,7 +1694,11 @@ static void migration_completion(MigrationState *s, int current_active_state,
if (!ret) {
ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
if (ret >= 0) {
/*
* Don't mark the image with BDRV_O_INACTIVE flag if
* we will go into COLO stage later.
*/
if (ret >= 0 && !migrate_colo_enabled()) {
ret = bdrv_inactivate_all();
}
if (ret >= 0) {
@ -1691,8 +1740,11 @@ static void migration_completion(MigrationState *s, int current_active_state,
goto fail_invalidate;
}
migrate_set_state(&s->state, current_active_state,
MIGRATION_STATUS_COMPLETED);
if (!migrate_colo_enabled()) {
migrate_set_state(&s->state, current_active_state,
MIGRATION_STATUS_COMPLETED);
}
return;
fail_invalidate:
@ -1713,6 +1765,12 @@ fail:
MIGRATION_STATUS_FAILED);
}
bool migrate_colo_enabled(void)
{
MigrationState *s = migrate_get_current();
return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO];
}
/*
* Master migration thread on the source VM.
* It drives the migration and pumps the data down the outgoing channel.
@ -1731,6 +1789,7 @@ static void *migration_thread(void *opaque)
bool entered_postcopy = false;
/* The active state we expect to be in; ACTIVE or POSTCOPY_ACTIVE */
enum MigrationStatus current_active_state = MIGRATION_STATUS_ACTIVE;
bool enable_colo = migrate_colo_enabled();
rcu_register_thread();
@ -1839,7 +1898,13 @@ static void *migration_thread(void *opaque)
end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
qemu_mutex_lock_iothread();
qemu_savevm_state_cleanup();
/*
* The resource has been allocated by migration will be reused in COLO
* process, so don't release them.
*/
if (!enable_colo) {
qemu_savevm_state_cleanup();
}
if (s->state == MIGRATION_STATUS_COMPLETED) {
uint64_t transferred_bytes = qemu_ftell(s->to_dst_file);
s->total_time = end_time - s->total_time;
@ -1852,6 +1917,15 @@ static void *migration_thread(void *opaque)
}
runstate_set(RUN_STATE_POSTMIGRATE);
} else {
if (s->state == MIGRATION_STATUS_ACTIVE && enable_colo) {
migrate_start_colo_process(s);
qemu_savevm_state_cleanup();
/*
* Fixme: we will run VM in COLO no matter its old running state.
* After exited COLO, we will keep running.
*/
old_vm_running = true;
}
if (old_vm_running && !entered_postcopy) {
vm_start();
} else {

View file

@ -43,6 +43,7 @@
#include "trace.h"
#include "exec/ram_addr.h"
#include "qemu/rcu_queue.h"
#include "migration/colo.h"
#ifdef DEBUG_MIGRATION_RAM
#define DPRINTF(fmt, ...) \
@ -1871,16 +1872,8 @@ err:
return ret;
}
/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
* long-running RCU critical section. When rcu-reclaims in the code
* start to become numerous it will be necessary to reduce the
* granularity of these critical sections.
*/
static int ram_save_setup(QEMUFile *f, void *opaque)
static int ram_save_init_globals(void)
{
RAMBlock *block;
int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
dirty_rate_high_cnt = 0;
@ -1947,6 +1940,29 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
migration_bitmap_sync();
qemu_mutex_unlock_ramlist();
qemu_mutex_unlock_iothread();
rcu_read_unlock();
return 0;
}
/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
* long-running RCU critical section. When rcu-reclaims in the code
* start to become numerous it will be necessary to reduce the
* granularity of these critical sections.
*/
static int ram_save_setup(QEMUFile *f, void *opaque)
{
RAMBlock *block;
/* migration has already setup the bitmap, reuse it. */
if (!migration_in_colo_state()) {
if (ram_save_init_globals() < 0) {
return -1;
}
}
rcu_read_lock();
qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
@ -2048,7 +2064,8 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
while (true) {
int pages;
pages = ram_find_and_save_block(f, true, &bytes_transferred);
pages = ram_find_and_save_block(f, !migration_in_colo_state(),
&bytes_transferred);
/* no more blocks to sent */
if (pages == 0) {
break;

View file

@ -207,3 +207,9 @@ migration_tls_outgoing_handshake_complete(void) ""
migration_tls_incoming_handshake_start(void) ""
migration_tls_incoming_handshake_error(const char *err) "err=%s"
migration_tls_incoming_handshake_complete(void) ""
# migration/colo.c
colo_vm_state_change(const char *old, const char *new) "Change '%s' => '%s'"
colo_send_message(const char *msg) "Send '%s' message"
colo_receive_message(const char *msg) "Receive '%s' message"
colo_failover_set_state(const char *new_state) "new state %s"

View file

@ -175,12 +175,15 @@
# @watchdog: the watchdog action is configured to pause and has been triggered
#
# @guest-panicked: guest has been panicked as a result of guest OS panic
#
# @colo: guest is paused to save/restore VM state under colo checkpoint (since
# 2.8)
##
{ 'enum': 'RunState',
'data': [ 'debug', 'inmigrate', 'internal-error', 'io-error', 'paused',
'postmigrate', 'prelaunch', 'finish-migrate', 'restore-vm',
'running', 'save-vm', 'shutdown', 'suspended', 'watchdog',
'guest-panicked' ] }
'guest-panicked', 'colo' ] }
##
# @StatusInfo:
@ -459,12 +462,14 @@
#
# @failed: some error occurred during migration process.
#
# @colo: VM is in the process of fault tolerance. (since 2.8)
#
# Since: 2.3
#
##
{ 'enum': 'MigrationStatus',
'data': [ 'none', 'setup', 'cancelling', 'cancelled',
'active', 'postcopy-active', 'completed', 'failed' ] }
'active', 'postcopy-active', 'completed', 'failed', 'colo' ] }
##
# @MigrationInfo
@ -574,11 +579,16 @@
# been migrated, pulling the remaining pages along as needed. NOTE: If
# the migration fails during postcopy the VM will fail. (since 2.6)
#
# @x-colo: If enabled, migration will never end, and the state of the VM on the
# primary side will be migrated continuously to the VM on secondary
# side, this process is called COarse-Grain LOck Stepping (COLO) for
# Non-stop Service. (since 2.8)
#
# Since: 1.2
##
{ 'enum': 'MigrationCapability',
'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
'compress', 'events', 'postcopy-ram'] }
'compress', 'events', 'postcopy-ram', 'x-colo'] }
##
# @MigrationCapabilityStatus
@ -664,19 +674,24 @@
# @downtime-limit: set maximum tolerated downtime for migration. maximum
# downtime in milliseconds (Since 2.8)
#
# @x-checkpoint-delay: The delay time (in ms) between two COLO checkpoints in
# periodic mode. (Since 2.8)
#
# Since: 2.4
##
{ 'enum': 'MigrationParameter',
'data': ['compress-level', 'compress-threads', 'decompress-threads',
'cpu-throttle-initial', 'cpu-throttle-increment',
'tls-creds', 'tls-hostname', 'max-bandwidth',
'downtime-limit'] }
'downtime-limit', 'x-checkpoint-delay' ] }
#
# @migrate-set-parameters
#
# Set various migration parameters. See MigrationParameters for details.
#
# @x-checkpoint-delay: the delay time between two checkpoints. (Since 2.8)
#
# Since: 2.4
##
{ 'command': 'migrate-set-parameters', 'boxed': true,
@ -725,6 +740,8 @@
# @downtime-limit: set maximum tolerated downtime for migration. maximum
# downtime in milliseconds (Since 2.8)
#
# @x-checkpoint-delay: the delay time between two COLO checkpoints. (Since 2.8)
#
# Since: 2.4
##
{ 'struct': 'MigrationParameters',
@ -736,7 +753,8 @@
'*tls-creds': 'str',
'*tls-hostname': 'str',
'*max-bandwidth': 'int',
'*downtime-limit': 'int'} }
'*downtime-limit': 'int',
'*x-checkpoint-delay': 'int'} }
##
# @query-migrate-parameters
@ -779,6 +797,78 @@
# Since: 2.5
{ 'command': 'migrate-start-postcopy' }
##
# @COLOMessage
#
# The message transmission between Primary side and Secondary side.
#
# @checkpoint-ready: Secondary VM (SVM) is ready for checkpointing
#
# @checkpoint-request: Primary VM (PVM) tells SVM to prepare for checkpointing
#
# @checkpoint-reply: SVM gets PVM's checkpoint request
#
# @vmstate-send: VM's state will be sent by PVM.
#
# @vmstate-size: The total size of VMstate.
#
# @vmstate-received: VM's state has been received by SVM.
#
# @vmstate-loaded: VM's state has been loaded by SVM.
#
# Since: 2.8
##
{ 'enum': 'COLOMessage',
'data': [ 'checkpoint-ready', 'checkpoint-request', 'checkpoint-reply',
'vmstate-send', 'vmstate-size', 'vmstate-received',
'vmstate-loaded' ] }
##
# @COLOMode
#
# The colo mode
#
# @unknown: unknown mode
#
# @primary: master side
#
# @secondary: slave side
#
# Since: 2.8
##
{ 'enum': 'COLOMode',
'data': [ 'unknown', 'primary', 'secondary'] }
##
# @FailoverStatus
#
# An enumeration of COLO failover status
#
# @none: no failover has ever happened
#
# @require: got failover requirement but not handled
#
# @active: in the process of doing failover
#
# @completed: finish the process of failover
#
# Since: 2.8
##
{ 'enum': 'FailoverStatus',
'data': [ 'none', 'require', 'active', 'completed'] }
##
# @x-colo-lost-heartbeat
#
# Tell qemu that heartbeat is lost, request it to do takeover procedures.
# If this command is sent to the PVM, the Primary side will exit COLO mode.
# If sent to the Secondary, the Secondary side will run failover work,
# then takes over server operation to become the service VM.
#
# Since: 2.8
##
{ 'command': 'x-colo-lost-heartbeat' }
##
# @MouseInfo:
#

View file

@ -49,3 +49,4 @@ stub-obj-y += iohandler.o
stub-obj-y += smbios_type_38.o
stub-obj-y += ipmi.o
stub-obj-y += pc_madt_cpu_entry.o
stub-obj-y += migration-colo.o

46
stubs/migration-colo.c Normal file
View file

@ -0,0 +1,46 @@
/*
* COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
* (a.k.a. Fault Tolerance or Continuous Replication)
*
* Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
* Copyright (c) 2016 FUJITSU LIMITED
* Copyright (c) 2016 Intel Corporation
*
* This work is licensed under the terms of the GNU GPL, version 2 or
* later. See the COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
#include "migration/colo.h"
#include "qmp-commands.h"
bool colo_supported(void)
{
return false;
}
bool migration_in_colo_state(void)
{
return false;
}
bool migration_incoming_in_colo_state(void)
{
return false;
}
void migrate_start_colo_process(MigrationState *s)
{
}
void *colo_process_incoming_thread(void *opaque)
{
return NULL;
}
void qmp_x_colo_lost_heartbeat(Error **errp)
{
error_setg(errp, "COLO is not supported, please rerun configure"
" with --enable-colo option in order to support"
" COLO feature");
}

11
vl.c
View file

@ -90,6 +90,7 @@ int main(int argc, char **argv)
#include "audio/audio.h"
#include "migration/migration.h"
#include "sysemu/cpus.h"
#include "migration/colo.h"
#include "sysemu/kvm.h"
#include "qapi/qmp/qjson.h"
#include "qemu/option.h"
@ -612,6 +613,7 @@ static const RunStateTransition runstate_transitions_def[] = {
{ RUN_STATE_INMIGRATE, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_INMIGRATE, RUN_STATE_PRELAUNCH },
{ RUN_STATE_INMIGRATE, RUN_STATE_POSTMIGRATE },
{ RUN_STATE_INMIGRATE, RUN_STATE_COLO },
{ RUN_STATE_INTERNAL_ERROR, RUN_STATE_PAUSED },
{ RUN_STATE_INTERNAL_ERROR, RUN_STATE_FINISH_MIGRATE },
@ -624,6 +626,7 @@ static const RunStateTransition runstate_transitions_def[] = {
{ RUN_STATE_PAUSED, RUN_STATE_RUNNING },
{ RUN_STATE_PAUSED, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_PAUSED, RUN_STATE_PRELAUNCH },
{ RUN_STATE_PAUSED, RUN_STATE_COLO},
{ RUN_STATE_POSTMIGRATE, RUN_STATE_RUNNING },
{ RUN_STATE_POSTMIGRATE, RUN_STATE_FINISH_MIGRATE },
@ -636,10 +639,13 @@ static const RunStateTransition runstate_transitions_def[] = {
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_RUNNING },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_POSTMIGRATE },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_PRELAUNCH },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_COLO},
{ RUN_STATE_RESTORE_VM, RUN_STATE_RUNNING },
{ RUN_STATE_RESTORE_VM, RUN_STATE_PRELAUNCH },
{ RUN_STATE_COLO, RUN_STATE_RUNNING },
{ RUN_STATE_RUNNING, RUN_STATE_DEBUG },
{ RUN_STATE_RUNNING, RUN_STATE_INTERNAL_ERROR },
{ RUN_STATE_RUNNING, RUN_STATE_IO_ERROR },
@ -650,6 +656,7 @@ static const RunStateTransition runstate_transitions_def[] = {
{ RUN_STATE_RUNNING, RUN_STATE_SHUTDOWN },
{ RUN_STATE_RUNNING, RUN_STATE_WATCHDOG },
{ RUN_STATE_RUNNING, RUN_STATE_GUEST_PANICKED },
{ RUN_STATE_RUNNING, RUN_STATE_COLO},
{ RUN_STATE_SAVE_VM, RUN_STATE_RUNNING },
@ -662,10 +669,12 @@ static const RunStateTransition runstate_transitions_def[] = {
{ RUN_STATE_SUSPENDED, RUN_STATE_RUNNING },
{ RUN_STATE_SUSPENDED, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_SUSPENDED, RUN_STATE_PRELAUNCH },
{ RUN_STATE_SUSPENDED, RUN_STATE_COLO},
{ RUN_STATE_WATCHDOG, RUN_STATE_RUNNING },
{ RUN_STATE_WATCHDOG, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_WATCHDOG, RUN_STATE_PRELAUNCH },
{ RUN_STATE_WATCHDOG, RUN_STATE_COLO},
{ RUN_STATE_GUEST_PANICKED, RUN_STATE_RUNNING },
{ RUN_STATE_GUEST_PANICKED, RUN_STATE_FINISH_MIGRATE },
@ -4420,6 +4429,8 @@ int main(int argc, char **argv, char **envp)
#endif
}
colo_info_init();
if (net_init_clients() < 0) {
exit(1);
}