Merge remote-tracking branch 'luiz/queue/qmp' into staging

# By Laszlo Ersek
# Via Luiz Capitulino
* luiz/queue/qmp:
  dump: rebase from host-private RAMBlock offsets to guest-physical addresses
  dump: populate guest_phys_blocks
  dump: introduce GuestPhysBlockList
  dump: clamp guest-provided mapping lengths to ramblock sizes

Message-id: 1375974809-1757-1-git-send-email-lcapitulino@redhat.com
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
This commit is contained in:
Anthony Liguori 2013-08-12 08:30:49 -05:00
commit 4a9a8876a1
7 changed files with 258 additions and 100 deletions

185
dump.c
View file

@ -59,6 +59,7 @@ static uint64_t cpu_convert_to_target64(uint64_t val, int endian)
}
typedef struct DumpState {
GuestPhysBlockList guest_phys_blocks;
ArchDumpInfo dump_info;
MemoryMappingList list;
uint16_t phdr_num;
@ -69,7 +70,7 @@ typedef struct DumpState {
hwaddr memory_offset;
int fd;
RAMBlock *block;
GuestPhysBlock *next_block;
ram_addr_t start;
bool has_filter;
int64_t begin;
@ -81,6 +82,7 @@ static int dump_cleanup(DumpState *s)
{
int ret = 0;
guest_phys_blocks_free(&s->guest_phys_blocks);
memory_mapping_list_free(&s->list);
if (s->fd != -1) {
close(s->fd);
@ -187,7 +189,8 @@ static int write_elf32_header(DumpState *s)
}
static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
int phdr_index, hwaddr offset)
int phdr_index, hwaddr offset,
hwaddr filesz)
{
Elf64_Phdr phdr;
int ret;
@ -197,15 +200,12 @@ static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
phdr.p_type = cpu_convert_to_target32(PT_LOAD, endian);
phdr.p_offset = cpu_convert_to_target64(offset, endian);
phdr.p_paddr = cpu_convert_to_target64(memory_mapping->phys_addr, endian);
if (offset == -1) {
/* When the memory is not stored into vmcore, offset will be -1 */
phdr.p_filesz = 0;
} else {
phdr.p_filesz = cpu_convert_to_target64(memory_mapping->length, endian);
}
phdr.p_filesz = cpu_convert_to_target64(filesz, endian);
phdr.p_memsz = cpu_convert_to_target64(memory_mapping->length, endian);
phdr.p_vaddr = cpu_convert_to_target64(memory_mapping->virt_addr, endian);
assert(memory_mapping->length >= filesz);
ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s);
if (ret < 0) {
dump_error(s, "dump: failed to write program header table.\n");
@ -216,7 +216,8 @@ static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
}
static int write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
int phdr_index, hwaddr offset)
int phdr_index, hwaddr offset,
hwaddr filesz)
{
Elf32_Phdr phdr;
int ret;
@ -226,15 +227,12 @@ static int write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
phdr.p_type = cpu_convert_to_target32(PT_LOAD, endian);
phdr.p_offset = cpu_convert_to_target32(offset, endian);
phdr.p_paddr = cpu_convert_to_target32(memory_mapping->phys_addr, endian);
if (offset == -1) {
/* When the memory is not stored into vmcore, offset will be -1 */
phdr.p_filesz = 0;
} else {
phdr.p_filesz = cpu_convert_to_target32(memory_mapping->length, endian);
}
phdr.p_filesz = cpu_convert_to_target32(filesz, endian);
phdr.p_memsz = cpu_convert_to_target32(memory_mapping->length, endian);
phdr.p_vaddr = cpu_convert_to_target32(memory_mapping->virt_addr, endian);
assert(memory_mapping->length >= filesz);
ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s);
if (ret < 0) {
dump_error(s, "dump: failed to write program header table.\n");
@ -393,14 +391,14 @@ static int write_data(DumpState *s, void *buf, int length)
}
/* write the memroy to vmcore. 1 page per I/O. */
static int write_memory(DumpState *s, RAMBlock *block, ram_addr_t start,
static int write_memory(DumpState *s, GuestPhysBlock *block, ram_addr_t start,
int64_t size)
{
int64_t i;
int ret;
for (i = 0; i < size / TARGET_PAGE_SIZE; i++) {
ret = write_data(s, block->host + start + i * TARGET_PAGE_SIZE,
ret = write_data(s, block->host_addr + start + i * TARGET_PAGE_SIZE,
TARGET_PAGE_SIZE);
if (ret < 0) {
return ret;
@ -408,7 +406,7 @@ static int write_memory(DumpState *s, RAMBlock *block, ram_addr_t start,
}
if ((size % TARGET_PAGE_SIZE) != 0) {
ret = write_data(s, block->host + start + i * TARGET_PAGE_SIZE,
ret = write_data(s, block->host_addr + start + i * TARGET_PAGE_SIZE,
size % TARGET_PAGE_SIZE);
if (ret < 0) {
return ret;
@ -418,57 +416,71 @@ static int write_memory(DumpState *s, RAMBlock *block, ram_addr_t start,
return 0;
}
/* get the memory's offset in the vmcore */
static hwaddr get_offset(hwaddr phys_addr,
DumpState *s)
/* get the memory's offset and size in the vmcore */
static void get_offset_range(hwaddr phys_addr,
ram_addr_t mapping_length,
DumpState *s,
hwaddr *p_offset,
hwaddr *p_filesz)
{
RAMBlock *block;
GuestPhysBlock *block;
hwaddr offset = s->memory_offset;
int64_t size_in_block, start;
/* When the memory is not stored into vmcore, offset will be -1 */
*p_offset = -1;
*p_filesz = 0;
if (s->has_filter) {
if (phys_addr < s->begin || phys_addr >= s->begin + s->length) {
return -1;
return;
}
}
QTAILQ_FOREACH(block, &ram_list.blocks, next) {
QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
if (s->has_filter) {
if (block->offset >= s->begin + s->length ||
block->offset + block->length <= s->begin) {
if (block->target_start >= s->begin + s->length ||
block->target_end <= s->begin) {
/* This block is out of the range */
continue;
}
if (s->begin <= block->offset) {
start = block->offset;
if (s->begin <= block->target_start) {
start = block->target_start;
} else {
start = s->begin;
}
size_in_block = block->length - (start - block->offset);
if (s->begin + s->length < block->offset + block->length) {
size_in_block -= block->offset + block->length -
(s->begin + s->length);
size_in_block = block->target_end - start;
if (s->begin + s->length < block->target_end) {
size_in_block -= block->target_end - (s->begin + s->length);
}
} else {
start = block->offset;
size_in_block = block->length;
start = block->target_start;
size_in_block = block->target_end - block->target_start;
}
if (phys_addr >= start && phys_addr < start + size_in_block) {
return phys_addr - start + offset;
*p_offset = phys_addr - start + offset;
/* The offset range mapped from the vmcore file must not spill over
* the GuestPhysBlock, clamp it. The rest of the mapping will be
* zero-filled in memory at load time; see
* <http://refspecs.linuxbase.org/elf/gabi4+/ch5.pheader.html>.
*/
*p_filesz = phys_addr + mapping_length <= start + size_in_block ?
mapping_length :
size_in_block - (phys_addr - start);
return;
}
offset += size_in_block;
}
return -1;
}
static int write_elf_loads(DumpState *s)
{
hwaddr offset;
hwaddr offset, filesz;
MemoryMapping *memory_mapping;
uint32_t phdr_index = 1;
int ret;
@ -481,11 +493,15 @@ static int write_elf_loads(DumpState *s)
}
QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
offset = get_offset(memory_mapping->phys_addr, s);
get_offset_range(memory_mapping->phys_addr,
memory_mapping->length,
s, &offset, &filesz);
if (s->dump_info.d_class == ELFCLASS64) {
ret = write_elf64_load(s, memory_mapping, phdr_index++, offset);
ret = write_elf64_load(s, memory_mapping, phdr_index++, offset,
filesz);
} else {
ret = write_elf32_load(s, memory_mapping, phdr_index++, offset);
ret = write_elf32_load(s, memory_mapping, phdr_index++, offset,
filesz);
}
if (ret < 0) {
@ -596,7 +612,7 @@ static int dump_completed(DumpState *s)
return 0;
}
static int get_next_block(DumpState *s, RAMBlock *block)
static int get_next_block(DumpState *s, GuestPhysBlock *block)
{
while (1) {
block = QTAILQ_NEXT(block, next);
@ -606,16 +622,16 @@ static int get_next_block(DumpState *s, RAMBlock *block)
}
s->start = 0;
s->block = block;
s->next_block = block;
if (s->has_filter) {
if (block->offset >= s->begin + s->length ||
block->offset + block->length <= s->begin) {
if (block->target_start >= s->begin + s->length ||
block->target_end <= s->begin) {
/* This block is out of the range */
continue;
}
if (s->begin > block->offset) {
s->start = s->begin - block->offset;
if (s->begin > block->target_start) {
s->start = s->begin - block->target_start;
}
}
@ -626,18 +642,18 @@ static int get_next_block(DumpState *s, RAMBlock *block)
/* write all memory to vmcore */
static int dump_iterate(DumpState *s)
{
RAMBlock *block;
GuestPhysBlock *block;
int64_t size;
int ret;
while (1) {
block = s->block;
block = s->next_block;
size = block->length;
size = block->target_end - block->target_start;
if (s->has_filter) {
size -= s->start;
if (s->begin + s->length < block->offset + block->length) {
size -= block->offset + block->length - (s->begin + s->length);
if (s->begin + s->length < block->target_end) {
size -= block->target_end - (s->begin + s->length);
}
}
ret = write_memory(s, block, s->start, size);
@ -672,23 +688,23 @@ static int create_vmcore(DumpState *s)
static ram_addr_t get_start_block(DumpState *s)
{
RAMBlock *block;
GuestPhysBlock *block;
if (!s->has_filter) {
s->block = QTAILQ_FIRST(&ram_list.blocks);
s->next_block = QTAILQ_FIRST(&s->guest_phys_blocks.head);
return 0;
}
QTAILQ_FOREACH(block, &ram_list.blocks, next) {
if (block->offset >= s->begin + s->length ||
block->offset + block->length <= s->begin) {
QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
if (block->target_start >= s->begin + s->length ||
block->target_end <= s->begin) {
/* This block is out of the range */
continue;
}
s->block = block;
if (s->begin > block->offset) {
s->start = s->begin - block->offset;
s->next_block = block;
if (s->begin > block->target_start) {
s->start = s->begin - block->target_start;
} else {
s->start = 0;
}
@ -713,24 +729,8 @@ static int dump_init(DumpState *s, int fd, bool paging, bool has_filter,
s->resume = false;
}
s->errp = errp;
s->fd = fd;
s->has_filter = has_filter;
s->begin = begin;
s->length = length;
s->start = get_start_block(s);
if (s->start == -1) {
error_set(errp, QERR_INVALID_PARAMETER, "begin");
goto cleanup;
}
/*
* get dump info: endian, class and architecture.
* If the target architecture is not supported, cpu_get_dump_info() will
* return -1.
*
* If we use KVM, we should synchronize the registers before we get dump
* info.
/* If we use KVM, we should synchronize the registers before we get dump
* info or physmap info.
*/
cpu_synchronize_all_states();
nr_cpus = 0;
@ -738,7 +738,26 @@ static int dump_init(DumpState *s, int fd, bool paging, bool has_filter,
nr_cpus++;
}
ret = cpu_get_dump_info(&s->dump_info);
s->errp = errp;
s->fd = fd;
s->has_filter = has_filter;
s->begin = begin;
s->length = length;
guest_phys_blocks_init(&s->guest_phys_blocks);
guest_phys_blocks_append(&s->guest_phys_blocks);
s->start = get_start_block(s);
if (s->start == -1) {
error_set(errp, QERR_INVALID_PARAMETER, "begin");
goto cleanup;
}
/* get dump info: endian, class and architecture.
* If the target architecture is not supported, cpu_get_dump_info() will
* return -1.
*/
ret = cpu_get_dump_info(&s->dump_info, &s->guest_phys_blocks);
if (ret < 0) {
error_set(errp, QERR_UNSUPPORTED);
goto cleanup;
@ -754,13 +773,13 @@ static int dump_init(DumpState *s, int fd, bool paging, bool has_filter,
/* get memory mapping */
memory_mapping_list_init(&s->list);
if (paging) {
qemu_get_guest_memory_mapping(&s->list, &err);
qemu_get_guest_memory_mapping(&s->list, &s->guest_phys_blocks, &err);
if (err != NULL) {
error_propagate(errp, err);
goto cleanup;
}
} else {
qemu_get_guest_simple_memory_mapping(&s->list);
qemu_get_guest_simple_memory_mapping(&s->list, &s->guest_phys_blocks);
}
if (s->has_filter) {
@ -812,6 +831,8 @@ static int dump_init(DumpState *s, int fd, bool paging, bool has_filter,
return 0;
cleanup:
guest_phys_blocks_free(&s->guest_phys_blocks);
if (s->resume) {
vm_start();
}
@ -859,7 +880,7 @@ void qmp_dump_guest_memory(bool paging, const char *file, bool has_begin,
return;
}
s = g_malloc(sizeof(DumpState));
s = g_malloc0(sizeof(DumpState));
ret = dump_init(s, fd, paging, has_begin, begin, length, errp);
if (ret < 0) {

View file

@ -20,7 +20,9 @@ typedef struct ArchDumpInfo {
int d_class; /* ELFCLASS32 or ELFCLASS64 */
} ArchDumpInfo;
int cpu_get_dump_info(ArchDumpInfo *info);
struct GuestPhysBlockList; /* memory_mapping.h */
int cpu_get_dump_info(ArchDumpInfo *info,
const struct GuestPhysBlockList *guest_phys_blocks);
ssize_t cpu_get_note_size(int class, int machine, int nr_cpus);
#endif

View file

@ -17,6 +17,25 @@
#include "qemu/queue.h"
#include "qemu/typedefs.h"
typedef struct GuestPhysBlock {
/* visible to guest, reflects PCI hole, etc */
hwaddr target_start;
/* implies size */
hwaddr target_end;
/* points into host memory */
uint8_t *host_addr;
QTAILQ_ENTRY(GuestPhysBlock) next;
} GuestPhysBlock;
/* point-in-time snapshot of guest-visible physical mappings */
typedef struct GuestPhysBlockList {
unsigned num;
QTAILQ_HEAD(GuestPhysBlockHead, GuestPhysBlock) head;
} GuestPhysBlockList;
/* The physical and virtual address in the memory mapping are contiguous. */
typedef struct MemoryMapping {
hwaddr phys_addr;
@ -45,10 +64,17 @@ void memory_mapping_list_free(MemoryMappingList *list);
void memory_mapping_list_init(MemoryMappingList *list);
void qemu_get_guest_memory_mapping(MemoryMappingList *list, Error **errp);
void guest_phys_blocks_free(GuestPhysBlockList *list);
void guest_phys_blocks_init(GuestPhysBlockList *list);
void guest_phys_blocks_append(GuestPhysBlockList *list);
void qemu_get_guest_memory_mapping(MemoryMappingList *list,
const GuestPhysBlockList *guest_phys_blocks,
Error **errp);
/* get guest's memory mapping without do paging(virtual address is 0). */
void qemu_get_guest_simple_memory_mapping(MemoryMappingList *list);
void qemu_get_guest_simple_memory_mapping(MemoryMappingList *list,
const GuestPhysBlockList *guest_phys_blocks);
void memory_mapping_filter(MemoryMappingList *list, int64_t begin,
int64_t length);

View file

@ -11,9 +11,15 @@
*
*/
#include <glib.h>
#include "cpu.h"
#include "exec/cpu-all.h"
#include "sysemu/memory_mapping.h"
#include "exec/memory.h"
#include "exec/address-spaces.h"
//#define DEBUG_GUEST_PHYS_REGION_ADD
static void memory_mapping_list_add_mapping_sorted(MemoryMappingList *list,
MemoryMapping *mapping)
@ -165,6 +171,101 @@ void memory_mapping_list_init(MemoryMappingList *list)
QTAILQ_INIT(&list->head);
}
void guest_phys_blocks_free(GuestPhysBlockList *list)
{
GuestPhysBlock *p, *q;
QTAILQ_FOREACH_SAFE(p, &list->head, next, q) {
QTAILQ_REMOVE(&list->head, p, next);
g_free(p);
}
list->num = 0;
}
void guest_phys_blocks_init(GuestPhysBlockList *list)
{
list->num = 0;
QTAILQ_INIT(&list->head);
}
typedef struct GuestPhysListener {
GuestPhysBlockList *list;
MemoryListener listener;
} GuestPhysListener;
static void guest_phys_blocks_region_add(MemoryListener *listener,
MemoryRegionSection *section)
{
GuestPhysListener *g;
uint64_t section_size;
hwaddr target_start, target_end;
uint8_t *host_addr;
GuestPhysBlock *predecessor;
/* we only care about RAM */
if (!memory_region_is_ram(section->mr)) {
return;
}
g = container_of(listener, GuestPhysListener, listener);
section_size = int128_get64(section->size);
target_start = section->offset_within_address_space;
target_end = target_start + section_size;
host_addr = memory_region_get_ram_ptr(section->mr) +
section->offset_within_region;
predecessor = NULL;
/* find continuity in guest physical address space */
if (!QTAILQ_EMPTY(&g->list->head)) {
hwaddr predecessor_size;
predecessor = QTAILQ_LAST(&g->list->head, GuestPhysBlockHead);
predecessor_size = predecessor->target_end - predecessor->target_start;
/* the memory API guarantees monotonically increasing traversal */
g_assert(predecessor->target_end <= target_start);
/* we want continuity in both guest-physical and host-virtual memory */
if (predecessor->target_end < target_start ||
predecessor->host_addr + predecessor_size != host_addr) {
predecessor = NULL;
}
}
if (predecessor == NULL) {
/* isolated mapping, allocate it and add it to the list */
GuestPhysBlock *block = g_malloc0(sizeof *block);
block->target_start = target_start;
block->target_end = target_end;
block->host_addr = host_addr;
QTAILQ_INSERT_TAIL(&g->list->head, block, next);
++g->list->num;
} else {
/* expand predecessor until @target_end; predecessor's start doesn't
* change
*/
predecessor->target_end = target_end;
}
#ifdef DEBUG_GUEST_PHYS_REGION_ADD
fprintf(stderr, "%s: target_start=" TARGET_FMT_plx " target_end="
TARGET_FMT_plx ": %s (count: %u)\n", __FUNCTION__, target_start,
target_end, predecessor ? "joined" : "added", g->list->num);
#endif
}
void guest_phys_blocks_append(GuestPhysBlockList *list)
{
GuestPhysListener g = { 0 };
g.list = list;
g.listener.region_add = &guest_phys_blocks_region_add;
memory_listener_register(&g.listener, &address_space_memory);
memory_listener_unregister(&g.listener);
}
static CPUState *find_paging_enabled_cpu(CPUState *start_cpu)
{
CPUState *cpu;
@ -178,10 +279,12 @@ static CPUState *find_paging_enabled_cpu(CPUState *start_cpu)
return NULL;
}
void qemu_get_guest_memory_mapping(MemoryMappingList *list, Error **errp)
void qemu_get_guest_memory_mapping(MemoryMappingList *list,
const GuestPhysBlockList *guest_phys_blocks,
Error **errp)
{
CPUState *cpu, *first_paging_enabled_cpu;
RAMBlock *block;
GuestPhysBlock *block;
ram_addr_t offset, length;
first_paging_enabled_cpu = find_paging_enabled_cpu(first_cpu);
@ -201,19 +304,21 @@ void qemu_get_guest_memory_mapping(MemoryMappingList *list, Error **errp)
* If the guest doesn't use paging, the virtual address is equal to physical
* address.
*/
QTAILQ_FOREACH(block, &ram_list.blocks, next) {
offset = block->offset;
length = block->length;
QTAILQ_FOREACH(block, &guest_phys_blocks->head, next) {
offset = block->target_start;
length = block->target_end - block->target_start;
create_new_memory_mapping(list, offset, offset, length);
}
}
void qemu_get_guest_simple_memory_mapping(MemoryMappingList *list)
void qemu_get_guest_simple_memory_mapping(MemoryMappingList *list,
const GuestPhysBlockList *guest_phys_blocks)
{
RAMBlock *block;
GuestPhysBlock *block;
QTAILQ_FOREACH(block, &ram_list.blocks, next) {
create_new_memory_mapping(list, block->offset, 0, block->length);
QTAILQ_FOREACH(block, &guest_phys_blocks->head, next) {
create_new_memory_mapping(list, block->target_start, 0,
block->target_end - block->target_start);
}
}

View file

@ -16,7 +16,8 @@
#include "qapi/qmp/qerror.h"
#include "qmp-commands.h"
int cpu_get_dump_info(ArchDumpInfo *info)
int cpu_get_dump_info(ArchDumpInfo *info,
const struct GuestPhysBlockList *guest_phys_blocks)
{
return -1;
}

View file

@ -15,6 +15,7 @@
#include "exec/cpu-all.h"
#include "sysemu/dump.h"
#include "elf.h"
#include "sysemu/memory_mapping.h"
#ifdef TARGET_X86_64
typedef struct {
@ -389,10 +390,11 @@ int x86_cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cs,
return cpu_write_qemu_note(f, &cpu->env, opaque, 0);
}
int cpu_get_dump_info(ArchDumpInfo *info)
int cpu_get_dump_info(ArchDumpInfo *info,
const GuestPhysBlockList *guest_phys_blocks)
{
bool lma = false;
RAMBlock *block;
GuestPhysBlock *block;
#ifdef TARGET_X86_64
X86CPU *first_x86_cpu = X86_CPU(first_cpu);
@ -412,8 +414,8 @@ int cpu_get_dump_info(ArchDumpInfo *info)
} else {
info->d_class = ELFCLASS32;
QTAILQ_FOREACH(block, &ram_list.blocks, next) {
if (block->offset + block->length > UINT_MAX) {
QTAILQ_FOREACH(block, &guest_phys_blocks->head, next) {
if (block->target_end > UINT_MAX) {
/* The memory size is greater than 4G */
info->d_class = ELFCLASS64;
break;

View file

@ -176,7 +176,8 @@ int s390_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
return s390x_write_all_elf64_notes("CORE", f, cpu, cpuid, opaque);
}
int cpu_get_dump_info(ArchDumpInfo *info)
int cpu_get_dump_info(ArchDumpInfo *info,
const struct GuestPhysBlockList *guest_phys_blocks)
{
info->d_machine = EM_S390;
info->d_endian = ELFDATA2MSB;