qemu/trace-events
Stefan Hajnoczi 4a1cba3802 aio: add polling mode to AioContext
The AioContext event loop uses ppoll(2) or epoll_wait(2) to monitor file
descriptors or until a timer expires.  In cases like virtqueues, Linux
AIO, and ThreadPool it is technically possible to wait for events via
polling (i.e. continuously checking for events without blocking).

Polling can be faster than blocking syscalls because file descriptors,
the process scheduler, and system calls are bypassed.

The main disadvantage to polling is that it increases CPU utilization.
In classic polling configuration a full host CPU thread might run at
100% to respond to events as quickly as possible.  This patch implements
a timeout so we fall back to blocking syscalls if polling detects no
activity.  After the timeout no CPU cycles are wasted on polling until
the next event loop iteration.

The run_poll_handlers_begin() and run_poll_handlers_end() trace events
are added to aid performance analysis and troubleshooting.  If you need
to know whether polling mode is being used, trace these events to find
out.

Note that the AioContext is now re-acquired before disabling notify_me
in the non-polling case.  This makes the code cleaner since notify_me
was enabled outside the non-polling AioContext release region.  This
change is correct since it's safe to keep notify_me enabled longer
(disabling is an optimization) but potentially causes unnecessary
event_notifer_set() calls.  I think the chance of performance regression
is small here.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20161201192652.9509-4-stefanha@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2017-01-03 16:38:48 +00:00

185 lines
9.5 KiB
Plaintext

# Trace events for debugging and performance instrumentation
#
# This file is processed by the tracetool script during the build.
#
# To add a new trace event:
#
# 1. Choose a name for the trace event. Declare its arguments and format
# string.
#
# 2. Call the trace event from code using trace_##name, e.g. multiwrite_cb() ->
# trace_multiwrite_cb(). The source file must #include "trace.h".
#
# Format of a trace event:
#
# [disable] <name>(<type1> <arg1>[, <type2> <arg2>] ...) "<format-string>"
#
# Example: g_malloc(size_t size) "size %zu"
#
# The "disable" keyword will build without the trace event.
#
# The <name> must be a valid as a C function name.
#
# Types should be standard C types. Use void * for pointers because the trace
# system may not have the necessary headers included.
#
# The <format-string> should be a sprintf()-compatible format string.
# aio-posix.c
run_poll_handlers_begin(void *ctx, int64_t max_ns) "ctx %p max_ns %"PRId64
run_poll_handlers_end(void *ctx, bool progress) "ctx %p progress %d"
# thread-pool.c
thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
thread_pool_complete(void *pool, void *req, void *opaque, int ret) "pool %p req %p opaque %p ret %d"
thread_pool_cancel(void *req, void *opaque) "req %p opaque %p"
# ioport.c
cpu_in(unsigned int addr, char size, unsigned int val) "addr %#x(%c) value %u"
cpu_out(unsigned int addr, char size, unsigned int val) "addr %#x(%c) value %u"
# balloon.c
# Since requests are raised via monitor, not many tracepoints are needed.
balloon_event(void *opaque, unsigned long addr) "opaque %p addr %lu"
# vl.c
vm_state_notify(int running, int reason) "running %d reason %d"
load_file(const char *name, const char *path) "name %s location %s"
runstate_set(int new_state) "new state %d"
system_wakeup_request(int reason) "reason=%d"
qemu_system_shutdown_request(void) ""
qemu_system_powerdown_request(void) ""
# spice-qemu-char.c
spice_vmc_write(ssize_t out, int len) "spice wrottn %zd of requested %d"
spice_vmc_read(int bytes, int len) "spice read %d of requested %d"
spice_vmc_register_interface(void *scd) "spice vmc registered interface %p"
spice_vmc_unregister_interface(void *scd) "spice vmc unregistered interface %p"
spice_vmc_event(int event) "spice vmc event %d"
# xen-hvm.c
xen_ram_alloc(unsigned long ram_addr, unsigned long size) "requested: %#lx, size %#lx"
xen_client_set_memory(uint64_t start_addr, unsigned long size, bool log_dirty) "%#"PRIx64" size %#lx, log_dirty %i"
xen_default_ioreq_server(void) ""
xen_ioreq_server_create(uint32_t id) "id: %u"
xen_ioreq_server_destroy(uint32_t id) "id: %u"
xen_ioreq_server_state(uint32_t id, bool enable) "id: %u: enable: %i"
xen_map_mmio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: %#"PRIx64" end: %#"PRIx64
xen_unmap_mmio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: %#"PRIx64" end: %#"PRIx64
xen_map_portio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: %#"PRIx64" end: %#"PRIx64
xen_unmap_portio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: %#"PRIx64" end: %#"PRIx64
xen_map_pcidev(uint32_t id, uint8_t bus, uint8_t dev, uint8_t func) "id: %u bdf: %02x.%02x.%02x"
xen_unmap_pcidev(uint32_t id, uint8_t bus, uint8_t dev, uint8_t func) "id: %u bdf: %02x.%02x.%02x"
handle_ioreq(void *req, uint32_t type, uint32_t dir, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p type=%d dir=%d df=%d ptr=%d port=%#"PRIx64" data=%#"PRIx64" count=%d size=%d"
handle_ioreq_read(void *req, uint32_t type, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p read type=%d df=%d ptr=%d port=%#"PRIx64" data=%#"PRIx64" count=%d size=%d"
handle_ioreq_write(void *req, uint32_t type, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p write type=%d df=%d ptr=%d port=%#"PRIx64" data=%#"PRIx64" count=%d size=%d"
cpu_ioreq_pio(void *req, uint32_t dir, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p pio dir=%d df=%d ptr=%d port=%#"PRIx64" data=%#"PRIx64" count=%d size=%d"
cpu_ioreq_pio_read_reg(void *req, uint64_t data, uint64_t addr, uint32_t size) "I/O=%p pio read reg data=%#"PRIx64" port=%#"PRIx64" size=%d"
cpu_ioreq_pio_write_reg(void *req, uint64_t data, uint64_t addr, uint32_t size) "I/O=%p pio write reg data=%#"PRIx64" port=%#"PRIx64" size=%d"
cpu_ioreq_move(void *req, uint32_t dir, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p copy dir=%d df=%d ptr=%d port=%#"PRIx64" data=%#"PRIx64" count=%d size=%d"
# xen-mapcache.c
xen_map_cache(uint64_t phys_addr) "want %#"PRIx64
xen_remap_bucket(uint64_t index) "index %#"PRIx64
xen_map_cache_return(void* ptr) "%p"
# monitor.c
handle_qmp_command(void *mon, const char *cmd_name) "mon %p cmd_name \"%s\""
monitor_protocol_event_handler(uint32_t event, void *qdict) "event=%d data=%p"
monitor_protocol_event_emit(uint32_t event, void *data) "event=%d data=%p"
monitor_protocol_event_queue(uint32_t event, void *qdict, uint64_t rate) "event=%d data=%p rate=%" PRId64
monitor_protocol_event_throttle(uint32_t event, uint64_t rate) "event=%d rate=%" PRId64
# dma-helpers.c
dma_blk_io(void *dbs, void *bs, int64_t offset, bool to_dev) "dbs=%p bs=%p offset=%" PRId64 " to_dev=%d"
dma_aio_cancel(void *dbs) "dbs=%p"
dma_complete(void *dbs, int ret, void *cb) "dbs=%p ret=%d cb=%p"
dma_blk_cb(void *dbs, int ret) "dbs=%p ret=%d"
dma_map_wait(void *dbs) "dbs=%p"
# kvm-all.c
kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"
kvm_vm_ioctl(int type, void *arg) "type 0x%x, arg %p"
kvm_vcpu_ioctl(int cpu_index, int type, void *arg) "cpu_index %d, type 0x%x, arg %p"
kvm_run_exit(int cpu_index, uint32_t reason) "cpu_index %d, reason %d"
kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p"
kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s"
kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s"
kvm_irqchip_commit_routes(void) ""
kvm_irqchip_add_msi_route(int virq) "Adding MSI route virq=%d"
kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
# TCG related tracing (mostly disabled by default)
# cpu-exec.c
disable exec_tb(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR
disable exec_tb_nocache(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR
disable exec_tb_exit(void *last_tb, unsigned int flags) "tb:%p flags=%x"
# translate-all.c
translate_block(void *tb, uintptr_t pc, uint8_t *tb_code) "tb:%p, pc:0x%"PRIxPTR", tb_code:%p"
# memory.c
memory_region_ops_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
memory_region_ops_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
memory_region_subpage_read(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset %#"PRIx64" value %#"PRIx64" size %u"
memory_region_subpage_write(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset %#"PRIx64" value %#"PRIx64" size %u"
memory_region_tb_read(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size %u"
memory_region_tb_write(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size %u"
memory_region_ram_device_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
memory_region_ram_device_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
### Guest events, keep at bottom
## vCPU
# Hot-plug a new virtual (guest) CPU
#
# Mode: user, softmmu
# Targets: all
vcpu guest_cpu_enter(void)
# Reset the state of a virtual (guest) CPU
#
# Mode: user, softmmu
# Targets: all
vcpu guest_cpu_reset(void)
# @vaddr: Access' virtual address.
# @info : Access' information (see below).
#
# Start virtual memory access (before any potential access violation).
#
# Does not include memory accesses performed by devices.
#
# Access information can be parsed as:
#
# struct mem_info {
# uint8_t size_shift : 2; /* interpreted as "1 << size_shift" bytes */
# bool sign_extend: 1; /* sign-extended */
# uint8_t endianness : 1; /* 0: little, 1: big */
# bool store : 1; /* wheter it's a store operation */
# };
#
# Mode: user, softmmu
# Targets: TCG(all)
disable vcpu tcg guest_mem_before(TCGv vaddr, uint8_t info) "info=%d", "vaddr=0x%016"PRIx64" info=%d"
# @num: System call number.
# @arg*: System call argument value.
#
# Start executing a guest system call in syscall emulation mode.
#
# Mode: user
# Targets: TCG(all)
disable vcpu guest_user_syscall(uint64_t num, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, uint64_t arg7, uint64_t arg8) "num=0x%016"PRIx64" arg1=0x%016"PRIx64" arg2=0x%016"PRIx64" arg3=0x%016"PRIx64" arg4=0x%016"PRIx64" arg5=0x%016"PRIx64" arg6=0x%016"PRIx64" arg7=0x%016"PRIx64" arg8=0x%016"PRIx64
# @num: System call number.
# @ret: System call result value.
#
# Finish executing a guest system call in syscall emulation mode.
#
# Mode: user
# Targets: TCG(all)
disable vcpu guest_user_syscall_ret(uint64_t num, uint64_t ret) "num=0x%016"PRIx64" ret=0x%016"PRIx64