mirror of
https://gitlab.com/qemu-project/qemu
synced 2024-11-05 20:35:44 +00:00
195801d700
The Big QEMU Lock (BQL) has many names and they are confusing. The actual QemuMutex variable is called qemu_global_mutex but it's commonly referred to as the BQL in discussions and some code comments. The locking APIs, however, are called qemu_mutex_lock_iothread() and qemu_mutex_unlock_iothread(). The "iothread" name is historic and comes from when the main thread was split into into KVM vcpu threads and the "iothread" (now called the main loop thread). I have contributed to the confusion myself by introducing a separate --object iothread, a separate concept unrelated to the BQL. The "iothread" name is no longer appropriate for the BQL. Rename the locking APIs to: - void bql_lock(void) - void bql_unlock(void) - bool bql_locked(void) There are more APIs with "iothread" in their names. Subsequent patches will rename them. There are also comments and documentation that will be updated in later patches. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Paul Durrant <paul@xen.org> Acked-by: Fabiano Rosas <farosas@suse.de> Acked-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Cédric Le Goater <clg@kaod.org> Acked-by: Peter Xu <peterx@redhat.com> Acked-by: Eric Farman <farman@linux.ibm.com> Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com> Acked-by: Hyman Huang <yong.huang@smartx.com> Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com> Message-id: 20240102153529.486531-2-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
923 lines
26 KiB
C
923 lines
26 KiB
C
/*
|
|
* Dirtyrate implement code
|
|
*
|
|
* Copyright (c) 2020 HUAWEI TECHNOLOGIES CO.,LTD.
|
|
*
|
|
* Authors:
|
|
* Chuan Zheng <zhengchuan@huawei.com>
|
|
*
|
|
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
|
* See the COPYING file in the top-level directory.
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
#include "qemu/error-report.h"
|
|
#include <zlib.h>
|
|
#include "hw/core/cpu.h"
|
|
#include "qapi/error.h"
|
|
#include "exec/ramblock.h"
|
|
#include "exec/target_page.h"
|
|
#include "qemu/rcu_queue.h"
|
|
#include "qemu/main-loop.h"
|
|
#include "qapi/qapi-commands-migration.h"
|
|
#include "ram.h"
|
|
#include "trace.h"
|
|
#include "dirtyrate.h"
|
|
#include "monitor/hmp.h"
|
|
#include "monitor/monitor.h"
|
|
#include "qapi/qmp/qdict.h"
|
|
#include "sysemu/kvm.h"
|
|
#include "sysemu/runstate.h"
|
|
#include "exec/memory.h"
|
|
#include "qemu/xxhash.h"
|
|
|
|
/*
|
|
* total_dirty_pages is procted by BQL and is used
|
|
* to stat dirty pages during the period of two
|
|
* memory_global_dirty_log_sync
|
|
*/
|
|
uint64_t total_dirty_pages;
|
|
|
|
typedef struct DirtyPageRecord {
|
|
uint64_t start_pages;
|
|
uint64_t end_pages;
|
|
} DirtyPageRecord;
|
|
|
|
static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED;
|
|
static struct DirtyRateStat DirtyStat;
|
|
static DirtyRateMeasureMode dirtyrate_mode =
|
|
DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
|
|
|
|
static int64_t dirty_stat_wait(int64_t msec, int64_t initial_time)
|
|
{
|
|
int64_t current_time;
|
|
|
|
current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
|
if ((current_time - initial_time) >= msec) {
|
|
msec = current_time - initial_time;
|
|
} else {
|
|
g_usleep((msec + initial_time - current_time) * 1000);
|
|
/* g_usleep may overshoot */
|
|
msec = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - initial_time;
|
|
}
|
|
|
|
return msec;
|
|
}
|
|
|
|
static inline void record_dirtypages(DirtyPageRecord *dirty_pages,
|
|
CPUState *cpu, bool start)
|
|
{
|
|
if (start) {
|
|
dirty_pages[cpu->cpu_index].start_pages = cpu->dirty_pages;
|
|
} else {
|
|
dirty_pages[cpu->cpu_index].end_pages = cpu->dirty_pages;
|
|
}
|
|
}
|
|
|
|
static int64_t do_calculate_dirtyrate(DirtyPageRecord dirty_pages,
|
|
int64_t calc_time_ms)
|
|
{
|
|
uint64_t increased_dirty_pages =
|
|
dirty_pages.end_pages - dirty_pages.start_pages;
|
|
|
|
/*
|
|
* multiply by 1000ms/s _before_ converting down to megabytes
|
|
* to avoid losing precision
|
|
*/
|
|
return qemu_target_pages_to_MiB(increased_dirty_pages * 1000) /
|
|
calc_time_ms;
|
|
}
|
|
|
|
void global_dirty_log_change(unsigned int flag, bool start)
|
|
{
|
|
bql_lock();
|
|
if (start) {
|
|
memory_global_dirty_log_start(flag);
|
|
} else {
|
|
memory_global_dirty_log_stop(flag);
|
|
}
|
|
bql_unlock();
|
|
}
|
|
|
|
/*
|
|
* global_dirty_log_sync
|
|
* 1. sync dirty log from kvm
|
|
* 2. stop dirty tracking if needed.
|
|
*/
|
|
static void global_dirty_log_sync(unsigned int flag, bool one_shot)
|
|
{
|
|
bql_lock();
|
|
memory_global_dirty_log_sync(false);
|
|
if (one_shot) {
|
|
memory_global_dirty_log_stop(flag);
|
|
}
|
|
bql_unlock();
|
|
}
|
|
|
|
static DirtyPageRecord *vcpu_dirty_stat_alloc(VcpuStat *stat)
|
|
{
|
|
CPUState *cpu;
|
|
int nvcpu = 0;
|
|
|
|
CPU_FOREACH(cpu) {
|
|
nvcpu++;
|
|
}
|
|
|
|
stat->nvcpu = nvcpu;
|
|
stat->rates = g_new0(DirtyRateVcpu, nvcpu);
|
|
|
|
return g_new0(DirtyPageRecord, nvcpu);
|
|
}
|
|
|
|
static void vcpu_dirty_stat_collect(DirtyPageRecord *records,
|
|
bool start)
|
|
{
|
|
CPUState *cpu;
|
|
|
|
CPU_FOREACH(cpu) {
|
|
record_dirtypages(records, cpu, start);
|
|
}
|
|
}
|
|
|
|
int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms,
|
|
VcpuStat *stat,
|
|
unsigned int flag,
|
|
bool one_shot)
|
|
{
|
|
DirtyPageRecord *records;
|
|
int64_t init_time_ms;
|
|
int64_t duration;
|
|
int64_t dirtyrate;
|
|
int i = 0;
|
|
unsigned int gen_id;
|
|
|
|
retry:
|
|
init_time_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
|
|
|
WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
|
|
gen_id = cpu_list_generation_id_get();
|
|
records = vcpu_dirty_stat_alloc(stat);
|
|
vcpu_dirty_stat_collect(records, true);
|
|
}
|
|
|
|
duration = dirty_stat_wait(calc_time_ms, init_time_ms);
|
|
|
|
global_dirty_log_sync(flag, one_shot);
|
|
|
|
WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
|
|
if (gen_id != cpu_list_generation_id_get()) {
|
|
g_free(records);
|
|
g_free(stat->rates);
|
|
cpu_list_unlock();
|
|
goto retry;
|
|
}
|
|
vcpu_dirty_stat_collect(records, false);
|
|
}
|
|
|
|
for (i = 0; i < stat->nvcpu; i++) {
|
|
dirtyrate = do_calculate_dirtyrate(records[i], duration);
|
|
|
|
stat->rates[i].id = i;
|
|
stat->rates[i].dirty_rate = dirtyrate;
|
|
|
|
trace_dirtyrate_do_calculate_vcpu(i, dirtyrate);
|
|
}
|
|
|
|
g_free(records);
|
|
|
|
return duration;
|
|
}
|
|
|
|
static bool is_calc_time_valid(int64_t msec)
|
|
{
|
|
if ((msec < MIN_CALC_TIME_MS) || (msec > MAX_CALC_TIME_MS)) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool is_sample_pages_valid(int64_t pages)
|
|
{
|
|
return pages >= MIN_SAMPLE_PAGE_COUNT &&
|
|
pages <= MAX_SAMPLE_PAGE_COUNT;
|
|
}
|
|
|
|
static int dirtyrate_set_state(int *state, int old_state, int new_state)
|
|
{
|
|
assert(new_state < DIRTY_RATE_STATUS__MAX);
|
|
trace_dirtyrate_set_state(DirtyRateStatus_str(new_state));
|
|
if (qatomic_cmpxchg(state, old_state, new_state) == old_state) {
|
|
return 0;
|
|
} else {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
/* Decimal power of given time unit relative to one second */
|
|
static int time_unit_to_power(TimeUnit time_unit)
|
|
{
|
|
switch (time_unit) {
|
|
case TIME_UNIT_SECOND:
|
|
return 0;
|
|
case TIME_UNIT_MILLISECOND:
|
|
return -3;
|
|
default:
|
|
assert(false); /* unreachable */
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
static int64_t convert_time_unit(int64_t value, TimeUnit unit_from,
|
|
TimeUnit unit_to)
|
|
{
|
|
int power = time_unit_to_power(unit_from) -
|
|
time_unit_to_power(unit_to);
|
|
while (power < 0) {
|
|
value /= 10;
|
|
power += 1;
|
|
}
|
|
while (power > 0) {
|
|
value *= 10;
|
|
power -= 1;
|
|
}
|
|
return value;
|
|
}
|
|
|
|
|
|
static struct DirtyRateInfo *
|
|
query_dirty_rate_info(TimeUnit calc_time_unit)
|
|
{
|
|
int i;
|
|
int64_t dirty_rate = DirtyStat.dirty_rate;
|
|
struct DirtyRateInfo *info = g_new0(DirtyRateInfo, 1);
|
|
DirtyRateVcpuList *head = NULL, **tail = &head;
|
|
|
|
info->status = CalculatingState;
|
|
info->start_time = DirtyStat.start_time;
|
|
info->calc_time = convert_time_unit(DirtyStat.calc_time_ms,
|
|
TIME_UNIT_MILLISECOND,
|
|
calc_time_unit);
|
|
info->calc_time_unit = calc_time_unit;
|
|
info->sample_pages = DirtyStat.sample_pages;
|
|
info->mode = dirtyrate_mode;
|
|
|
|
if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURED) {
|
|
info->has_dirty_rate = true;
|
|
info->dirty_rate = dirty_rate;
|
|
|
|
if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
|
|
/*
|
|
* set sample_pages with 0 to indicate page sampling
|
|
* isn't enabled
|
|
**/
|
|
info->sample_pages = 0;
|
|
info->has_vcpu_dirty_rate = true;
|
|
for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
|
|
DirtyRateVcpu *rate = g_new0(DirtyRateVcpu, 1);
|
|
rate->id = DirtyStat.dirty_ring.rates[i].id;
|
|
rate->dirty_rate = DirtyStat.dirty_ring.rates[i].dirty_rate;
|
|
QAPI_LIST_APPEND(tail, rate);
|
|
}
|
|
info->vcpu_dirty_rate = head;
|
|
}
|
|
|
|
if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) {
|
|
info->sample_pages = 0;
|
|
}
|
|
}
|
|
|
|
trace_query_dirty_rate_info(DirtyRateStatus_str(CalculatingState));
|
|
|
|
return info;
|
|
}
|
|
|
|
static void init_dirtyrate_stat(struct DirtyRateConfig config)
|
|
{
|
|
DirtyStat.dirty_rate = -1;
|
|
DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
|
|
DirtyStat.calc_time_ms = config.calc_time_ms;
|
|
DirtyStat.sample_pages = config.sample_pages_per_gigabytes;
|
|
|
|
switch (config.mode) {
|
|
case DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING:
|
|
DirtyStat.page_sampling.total_dirty_samples = 0;
|
|
DirtyStat.page_sampling.total_sample_count = 0;
|
|
DirtyStat.page_sampling.total_block_mem_MB = 0;
|
|
break;
|
|
case DIRTY_RATE_MEASURE_MODE_DIRTY_RING:
|
|
DirtyStat.dirty_ring.nvcpu = -1;
|
|
DirtyStat.dirty_ring.rates = NULL;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void cleanup_dirtyrate_stat(struct DirtyRateConfig config)
|
|
{
|
|
/* last calc-dirty-rate qmp use dirty ring mode */
|
|
if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
|
|
free(DirtyStat.dirty_ring.rates);
|
|
DirtyStat.dirty_ring.rates = NULL;
|
|
}
|
|
}
|
|
|
|
static void update_dirtyrate_stat(struct RamblockDirtyInfo *info)
|
|
{
|
|
DirtyStat.page_sampling.total_dirty_samples += info->sample_dirty_count;
|
|
DirtyStat.page_sampling.total_sample_count += info->sample_pages_count;
|
|
/* size of total pages in MB */
|
|
DirtyStat.page_sampling.total_block_mem_MB +=
|
|
qemu_target_pages_to_MiB(info->ramblock_pages);
|
|
}
|
|
|
|
static void update_dirtyrate(uint64_t msec)
|
|
{
|
|
uint64_t dirtyrate;
|
|
uint64_t total_dirty_samples = DirtyStat.page_sampling.total_dirty_samples;
|
|
uint64_t total_sample_count = DirtyStat.page_sampling.total_sample_count;
|
|
uint64_t total_block_mem_MB = DirtyStat.page_sampling.total_block_mem_MB;
|
|
|
|
dirtyrate = total_dirty_samples * total_block_mem_MB *
|
|
1000 / (total_sample_count * msec);
|
|
|
|
DirtyStat.dirty_rate = dirtyrate;
|
|
}
|
|
|
|
/*
|
|
* Compute hash of a single page of size TARGET_PAGE_SIZE.
|
|
*/
|
|
static uint32_t compute_page_hash(void *ptr)
|
|
{
|
|
size_t page_size = qemu_target_page_size();
|
|
uint32_t i;
|
|
uint64_t v1, v2, v3, v4;
|
|
uint64_t res;
|
|
const uint64_t *p = ptr;
|
|
|
|
v1 = QEMU_XXHASH_SEED + XXH_PRIME64_1 + XXH_PRIME64_2;
|
|
v2 = QEMU_XXHASH_SEED + XXH_PRIME64_2;
|
|
v3 = QEMU_XXHASH_SEED + 0;
|
|
v4 = QEMU_XXHASH_SEED - XXH_PRIME64_1;
|
|
for (i = 0; i < page_size / 8; i += 4) {
|
|
v1 = XXH64_round(v1, p[i + 0]);
|
|
v2 = XXH64_round(v2, p[i + 1]);
|
|
v3 = XXH64_round(v3, p[i + 2]);
|
|
v4 = XXH64_round(v4, p[i + 3]);
|
|
}
|
|
res = XXH64_mergerounds(v1, v2, v3, v4);
|
|
res += page_size;
|
|
res = XXH64_avalanche(res);
|
|
return (uint32_t)(res & UINT32_MAX);
|
|
}
|
|
|
|
|
|
/*
|
|
* get hash result for the sampled memory with length of TARGET_PAGE_SIZE
|
|
* in ramblock, which starts from ramblock base address.
|
|
*/
|
|
static uint32_t get_ramblock_vfn_hash(struct RamblockDirtyInfo *info,
|
|
uint64_t vfn)
|
|
{
|
|
uint32_t hash;
|
|
|
|
hash = compute_page_hash(info->ramblock_addr +
|
|
vfn * qemu_target_page_size());
|
|
|
|
trace_get_ramblock_vfn_hash(info->idstr, vfn, hash);
|
|
return hash;
|
|
}
|
|
|
|
static bool save_ramblock_hash(struct RamblockDirtyInfo *info)
|
|
{
|
|
unsigned int sample_pages_count;
|
|
int i;
|
|
GRand *rand;
|
|
|
|
sample_pages_count = info->sample_pages_count;
|
|
|
|
/* ramblock size less than one page, return success to skip this ramblock */
|
|
if (unlikely(info->ramblock_pages == 0 || sample_pages_count == 0)) {
|
|
return true;
|
|
}
|
|
|
|
info->hash_result = g_try_malloc0_n(sample_pages_count,
|
|
sizeof(uint32_t));
|
|
if (!info->hash_result) {
|
|
return false;
|
|
}
|
|
|
|
info->sample_page_vfn = g_try_malloc0_n(sample_pages_count,
|
|
sizeof(uint64_t));
|
|
if (!info->sample_page_vfn) {
|
|
g_free(info->hash_result);
|
|
return false;
|
|
}
|
|
|
|
rand = g_rand_new();
|
|
for (i = 0; i < sample_pages_count; i++) {
|
|
info->sample_page_vfn[i] = g_rand_int_range(rand, 0,
|
|
info->ramblock_pages - 1);
|
|
info->hash_result[i] = get_ramblock_vfn_hash(info,
|
|
info->sample_page_vfn[i]);
|
|
}
|
|
g_rand_free(rand);
|
|
|
|
return true;
|
|
}
|
|
|
|
static void get_ramblock_dirty_info(RAMBlock *block,
|
|
struct RamblockDirtyInfo *info,
|
|
struct DirtyRateConfig *config)
|
|
{
|
|
uint64_t sample_pages_per_gigabytes = config->sample_pages_per_gigabytes;
|
|
|
|
/* Right shift 30 bits to calc ramblock size in GB */
|
|
info->sample_pages_count = (qemu_ram_get_used_length(block) *
|
|
sample_pages_per_gigabytes) >> 30;
|
|
/* Right shift TARGET_PAGE_BITS to calc page count */
|
|
info->ramblock_pages = qemu_ram_get_used_length(block) >>
|
|
qemu_target_page_bits();
|
|
info->ramblock_addr = qemu_ram_get_host_addr(block);
|
|
strcpy(info->idstr, qemu_ram_get_idstr(block));
|
|
}
|
|
|
|
static void free_ramblock_dirty_info(struct RamblockDirtyInfo *infos, int count)
|
|
{
|
|
int i;
|
|
|
|
if (!infos) {
|
|
return;
|
|
}
|
|
|
|
for (i = 0; i < count; i++) {
|
|
g_free(infos[i].sample_page_vfn);
|
|
g_free(infos[i].hash_result);
|
|
}
|
|
g_free(infos);
|
|
}
|
|
|
|
static bool skip_sample_ramblock(RAMBlock *block)
|
|
{
|
|
/*
|
|
* Sample only blocks larger than MIN_RAMBLOCK_SIZE.
|
|
*/
|
|
if (qemu_ram_get_used_length(block) < (MIN_RAMBLOCK_SIZE << 10)) {
|
|
trace_skip_sample_ramblock(block->idstr,
|
|
qemu_ram_get_used_length(block));
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static bool record_ramblock_hash_info(struct RamblockDirtyInfo **block_dinfo,
|
|
struct DirtyRateConfig config,
|
|
int *block_count)
|
|
{
|
|
struct RamblockDirtyInfo *info = NULL;
|
|
struct RamblockDirtyInfo *dinfo = NULL;
|
|
RAMBlock *block = NULL;
|
|
int total_count = 0;
|
|
int index = 0;
|
|
bool ret = false;
|
|
|
|
RAMBLOCK_FOREACH_MIGRATABLE(block) {
|
|
if (skip_sample_ramblock(block)) {
|
|
continue;
|
|
}
|
|
total_count++;
|
|
}
|
|
|
|
dinfo = g_try_malloc0_n(total_count, sizeof(struct RamblockDirtyInfo));
|
|
if (dinfo == NULL) {
|
|
goto out;
|
|
}
|
|
|
|
RAMBLOCK_FOREACH_MIGRATABLE(block) {
|
|
if (skip_sample_ramblock(block)) {
|
|
continue;
|
|
}
|
|
if (index >= total_count) {
|
|
break;
|
|
}
|
|
info = &dinfo[index];
|
|
get_ramblock_dirty_info(block, info, &config);
|
|
if (!save_ramblock_hash(info)) {
|
|
goto out;
|
|
}
|
|
index++;
|
|
}
|
|
ret = true;
|
|
|
|
out:
|
|
*block_count = index;
|
|
*block_dinfo = dinfo;
|
|
return ret;
|
|
}
|
|
|
|
static void calc_page_dirty_rate(struct RamblockDirtyInfo *info)
|
|
{
|
|
uint32_t hash;
|
|
int i;
|
|
|
|
for (i = 0; i < info->sample_pages_count; i++) {
|
|
hash = get_ramblock_vfn_hash(info, info->sample_page_vfn[i]);
|
|
if (hash != info->hash_result[i]) {
|
|
trace_calc_page_dirty_rate(info->idstr, hash, info->hash_result[i]);
|
|
info->sample_dirty_count++;
|
|
}
|
|
}
|
|
}
|
|
|
|
static struct RamblockDirtyInfo *
|
|
find_block_matched(RAMBlock *block, int count,
|
|
struct RamblockDirtyInfo *infos)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < count; i++) {
|
|
if (!strcmp(infos[i].idstr, qemu_ram_get_idstr(block))) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (i == count) {
|
|
return NULL;
|
|
}
|
|
|
|
if (infos[i].ramblock_addr != qemu_ram_get_host_addr(block) ||
|
|
infos[i].ramblock_pages !=
|
|
(qemu_ram_get_used_length(block) >> qemu_target_page_bits())) {
|
|
trace_find_page_matched(block->idstr);
|
|
return NULL;
|
|
}
|
|
|
|
return &infos[i];
|
|
}
|
|
|
|
static bool compare_page_hash_info(struct RamblockDirtyInfo *info,
|
|
int block_count)
|
|
{
|
|
struct RamblockDirtyInfo *block_dinfo = NULL;
|
|
RAMBlock *block = NULL;
|
|
|
|
RAMBLOCK_FOREACH_MIGRATABLE(block) {
|
|
if (skip_sample_ramblock(block)) {
|
|
continue;
|
|
}
|
|
block_dinfo = find_block_matched(block, block_count, info);
|
|
if (block_dinfo == NULL) {
|
|
continue;
|
|
}
|
|
calc_page_dirty_rate(block_dinfo);
|
|
update_dirtyrate_stat(block_dinfo);
|
|
}
|
|
|
|
if (DirtyStat.page_sampling.total_sample_count == 0) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static inline void record_dirtypages_bitmap(DirtyPageRecord *dirty_pages,
|
|
bool start)
|
|
{
|
|
if (start) {
|
|
dirty_pages->start_pages = total_dirty_pages;
|
|
} else {
|
|
dirty_pages->end_pages = total_dirty_pages;
|
|
}
|
|
}
|
|
|
|
static inline void dirtyrate_manual_reset_protect(void)
|
|
{
|
|
RAMBlock *block = NULL;
|
|
|
|
WITH_RCU_READ_LOCK_GUARD() {
|
|
RAMBLOCK_FOREACH_MIGRATABLE(block) {
|
|
memory_region_clear_dirty_bitmap(block->mr, 0,
|
|
block->used_length);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config)
|
|
{
|
|
int64_t start_time;
|
|
DirtyPageRecord dirty_pages;
|
|
|
|
bql_lock();
|
|
memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE);
|
|
|
|
/*
|
|
* 1'round of log sync may return all 1 bits with
|
|
* KVM_DIRTY_LOG_INITIALLY_SET enable
|
|
* skip it unconditionally and start dirty tracking
|
|
* from 2'round of log sync
|
|
*/
|
|
memory_global_dirty_log_sync(false);
|
|
|
|
/*
|
|
* reset page protect manually and unconditionally.
|
|
* this make sure kvm dirty log be cleared if
|
|
* KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE cap is enabled.
|
|
*/
|
|
dirtyrate_manual_reset_protect();
|
|
bql_unlock();
|
|
|
|
record_dirtypages_bitmap(&dirty_pages, true);
|
|
|
|
start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
|
DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
|
|
|
|
DirtyStat.calc_time_ms = dirty_stat_wait(config.calc_time_ms, start_time);
|
|
|
|
/*
|
|
* do two things.
|
|
* 1. fetch dirty bitmap from kvm
|
|
* 2. stop dirty tracking
|
|
*/
|
|
global_dirty_log_sync(GLOBAL_DIRTY_DIRTY_RATE, true);
|
|
|
|
record_dirtypages_bitmap(&dirty_pages, false);
|
|
|
|
DirtyStat.dirty_rate = do_calculate_dirtyrate(dirty_pages,
|
|
DirtyStat.calc_time_ms);
|
|
}
|
|
|
|
static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config)
|
|
{
|
|
uint64_t dirtyrate = 0;
|
|
uint64_t dirtyrate_sum = 0;
|
|
int i = 0;
|
|
|
|
/* start log sync */
|
|
global_dirty_log_change(GLOBAL_DIRTY_DIRTY_RATE, true);
|
|
|
|
DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
|
|
|
|
/* calculate vcpu dirtyrate */
|
|
DirtyStat.calc_time_ms = vcpu_calculate_dirtyrate(config.calc_time_ms,
|
|
&DirtyStat.dirty_ring,
|
|
GLOBAL_DIRTY_DIRTY_RATE,
|
|
true);
|
|
|
|
/* calculate vm dirtyrate */
|
|
for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
|
|
dirtyrate = DirtyStat.dirty_ring.rates[i].dirty_rate;
|
|
DirtyStat.dirty_ring.rates[i].dirty_rate = dirtyrate;
|
|
dirtyrate_sum += dirtyrate;
|
|
}
|
|
|
|
DirtyStat.dirty_rate = dirtyrate_sum;
|
|
}
|
|
|
|
static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config)
|
|
{
|
|
struct RamblockDirtyInfo *block_dinfo = NULL;
|
|
int block_count = 0;
|
|
int64_t initial_time;
|
|
|
|
rcu_read_lock();
|
|
initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
|
DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
|
|
if (!record_ramblock_hash_info(&block_dinfo, config, &block_count)) {
|
|
goto out;
|
|
}
|
|
rcu_read_unlock();
|
|
|
|
DirtyStat.calc_time_ms = dirty_stat_wait(config.calc_time_ms,
|
|
initial_time);
|
|
|
|
rcu_read_lock();
|
|
if (!compare_page_hash_info(block_dinfo, block_count)) {
|
|
goto out;
|
|
}
|
|
|
|
update_dirtyrate(DirtyStat.calc_time_ms);
|
|
|
|
out:
|
|
rcu_read_unlock();
|
|
free_ramblock_dirty_info(block_dinfo, block_count);
|
|
}
|
|
|
|
static void calculate_dirtyrate(struct DirtyRateConfig config)
|
|
{
|
|
if (config.mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) {
|
|
calculate_dirtyrate_dirty_bitmap(config);
|
|
} else if (config.mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
|
|
calculate_dirtyrate_dirty_ring(config);
|
|
} else {
|
|
calculate_dirtyrate_sample_vm(config);
|
|
}
|
|
|
|
trace_dirtyrate_calculate(DirtyStat.dirty_rate);
|
|
}
|
|
|
|
void *get_dirtyrate_thread(void *arg)
|
|
{
|
|
struct DirtyRateConfig config = *(struct DirtyRateConfig *)arg;
|
|
int ret;
|
|
rcu_register_thread();
|
|
|
|
ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_UNSTARTED,
|
|
DIRTY_RATE_STATUS_MEASURING);
|
|
if (ret == -1) {
|
|
error_report("change dirtyrate state failed.");
|
|
return NULL;
|
|
}
|
|
|
|
calculate_dirtyrate(config);
|
|
|
|
ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_MEASURING,
|
|
DIRTY_RATE_STATUS_MEASURED);
|
|
if (ret == -1) {
|
|
error_report("change dirtyrate state failed.");
|
|
}
|
|
|
|
rcu_unregister_thread();
|
|
return NULL;
|
|
}
|
|
|
|
void qmp_calc_dirty_rate(int64_t calc_time,
|
|
bool has_calc_time_unit,
|
|
TimeUnit calc_time_unit,
|
|
bool has_sample_pages,
|
|
int64_t sample_pages,
|
|
bool has_mode,
|
|
DirtyRateMeasureMode mode,
|
|
Error **errp)
|
|
{
|
|
static struct DirtyRateConfig config;
|
|
QemuThread thread;
|
|
int ret;
|
|
|
|
/*
|
|
* If the dirty rate is already being measured, don't attempt to start.
|
|
*/
|
|
if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURING) {
|
|
error_setg(errp, "the dirty rate is already being measured.");
|
|
return;
|
|
}
|
|
|
|
int64_t calc_time_ms = convert_time_unit(
|
|
calc_time,
|
|
has_calc_time_unit ? calc_time_unit : TIME_UNIT_SECOND,
|
|
TIME_UNIT_MILLISECOND
|
|
);
|
|
|
|
if (!is_calc_time_valid(calc_time_ms)) {
|
|
error_setg(errp, "Calculation time is out of range [%dms, %dms].",
|
|
MIN_CALC_TIME_MS, MAX_CALC_TIME_MS);
|
|
return;
|
|
}
|
|
|
|
if (!has_mode) {
|
|
mode = DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
|
|
}
|
|
|
|
if (has_sample_pages && mode != DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING) {
|
|
error_setg(errp, "sample-pages is used only in page-sampling mode");
|
|
return;
|
|
}
|
|
|
|
if (has_sample_pages) {
|
|
if (!is_sample_pages_valid(sample_pages)) {
|
|
error_setg(errp, "sample-pages is out of range[%d, %d].",
|
|
MIN_SAMPLE_PAGE_COUNT,
|
|
MAX_SAMPLE_PAGE_COUNT);
|
|
return;
|
|
}
|
|
} else {
|
|
sample_pages = DIRTYRATE_DEFAULT_SAMPLE_PAGES;
|
|
}
|
|
|
|
/*
|
|
* dirty ring mode only works when kvm dirty ring is enabled.
|
|
* on the contrary, dirty bitmap mode is not.
|
|
*/
|
|
if (((mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) &&
|
|
!kvm_dirty_ring_enabled()) ||
|
|
((mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) &&
|
|
kvm_dirty_ring_enabled())) {
|
|
error_setg(errp, "mode %s is not enabled, use other method instead.",
|
|
DirtyRateMeasureMode_str(mode));
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Init calculation state as unstarted.
|
|
*/
|
|
ret = dirtyrate_set_state(&CalculatingState, CalculatingState,
|
|
DIRTY_RATE_STATUS_UNSTARTED);
|
|
if (ret == -1) {
|
|
error_setg(errp, "init dirty rate calculation state failed.");
|
|
return;
|
|
}
|
|
|
|
config.calc_time_ms = calc_time_ms;
|
|
config.sample_pages_per_gigabytes = sample_pages;
|
|
config.mode = mode;
|
|
|
|
cleanup_dirtyrate_stat(config);
|
|
|
|
/*
|
|
* update dirty rate mode so that we can figure out what mode has
|
|
* been used in last calculation
|
|
**/
|
|
dirtyrate_mode = mode;
|
|
|
|
init_dirtyrate_stat(config);
|
|
|
|
qemu_thread_create(&thread, "get_dirtyrate", get_dirtyrate_thread,
|
|
(void *)&config, QEMU_THREAD_DETACHED);
|
|
}
|
|
|
|
|
|
struct DirtyRateInfo *qmp_query_dirty_rate(bool has_calc_time_unit,
|
|
TimeUnit calc_time_unit,
|
|
Error **errp)
|
|
{
|
|
return query_dirty_rate_info(
|
|
has_calc_time_unit ? calc_time_unit : TIME_UNIT_SECOND);
|
|
}
|
|
|
|
void hmp_info_dirty_rate(Monitor *mon, const QDict *qdict)
|
|
{
|
|
DirtyRateInfo *info = query_dirty_rate_info(TIME_UNIT_SECOND);
|
|
|
|
monitor_printf(mon, "Status: %s\n",
|
|
DirtyRateStatus_str(info->status));
|
|
monitor_printf(mon, "Start Time: %"PRIi64" (ms)\n",
|
|
info->start_time);
|
|
if (info->mode == DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING) {
|
|
monitor_printf(mon, "Sample Pages: %"PRIu64" (per GB)\n",
|
|
info->sample_pages);
|
|
}
|
|
monitor_printf(mon, "Period: %"PRIi64" (sec)\n",
|
|
info->calc_time);
|
|
monitor_printf(mon, "Mode: %s\n",
|
|
DirtyRateMeasureMode_str(info->mode));
|
|
monitor_printf(mon, "Dirty rate: ");
|
|
if (info->has_dirty_rate) {
|
|
monitor_printf(mon, "%"PRIi64" (MB/s)\n", info->dirty_rate);
|
|
if (info->has_vcpu_dirty_rate) {
|
|
DirtyRateVcpuList *rate, *head = info->vcpu_dirty_rate;
|
|
for (rate = head; rate != NULL; rate = rate->next) {
|
|
monitor_printf(mon, "vcpu[%"PRIi64"], Dirty rate: %"PRIi64
|
|
" (MB/s)\n", rate->value->id,
|
|
rate->value->dirty_rate);
|
|
}
|
|
}
|
|
} else {
|
|
monitor_printf(mon, "(not ready)\n");
|
|
}
|
|
|
|
qapi_free_DirtyRateVcpuList(info->vcpu_dirty_rate);
|
|
g_free(info);
|
|
}
|
|
|
|
void hmp_calc_dirty_rate(Monitor *mon, const QDict *qdict)
|
|
{
|
|
int64_t sec = qdict_get_try_int(qdict, "second", 0);
|
|
int64_t sample_pages = qdict_get_try_int(qdict, "sample_pages_per_GB", -1);
|
|
bool has_sample_pages = (sample_pages != -1);
|
|
bool dirty_ring = qdict_get_try_bool(qdict, "dirty_ring", false);
|
|
bool dirty_bitmap = qdict_get_try_bool(qdict, "dirty_bitmap", false);
|
|
DirtyRateMeasureMode mode = DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
|
|
Error *err = NULL;
|
|
|
|
if (!sec) {
|
|
monitor_printf(mon, "Incorrect period length specified!\n");
|
|
return;
|
|
}
|
|
|
|
if (dirty_ring && dirty_bitmap) {
|
|
monitor_printf(mon, "Either dirty ring or dirty bitmap "
|
|
"can be specified!\n");
|
|
return;
|
|
}
|
|
|
|
if (dirty_bitmap) {
|
|
mode = DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP;
|
|
} else if (dirty_ring) {
|
|
mode = DIRTY_RATE_MEASURE_MODE_DIRTY_RING;
|
|
}
|
|
|
|
qmp_calc_dirty_rate(sec, /* calc-time */
|
|
false, TIME_UNIT_SECOND, /* calc-time-unit */
|
|
has_sample_pages, sample_pages,
|
|
true, mode,
|
|
&err);
|
|
if (err) {
|
|
hmp_handle_error(mon, err);
|
|
return;
|
|
}
|
|
|
|
monitor_printf(mon, "Starting dirty rate measurement with period %"PRIi64
|
|
" seconds\n", sec);
|
|
monitor_printf(mon, "[Please use 'info dirty_rate' to check results]\n");
|
|
}
|