From d01c05c955f7736952c13d85edaae0b262b7a0a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Llu=C3=ADs=20Vilanova?= Date: Tue, 4 Jul 2017 10:34:19 +0200 Subject: [PATCH 1/7] trace: Allocate cpu->trace_dstate in place MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's little point in dynamically allocating the bitmap if we know at compile-time the max number of events we want to support. Thus, make room in the struct for the bitmap, which will make things easier later: this paves the way for upcoming changes, in which we'll use a u32 to fully capture cpu->trace_dstate. This change also increases performance by saving a dereference and improving locality--note that this is important since upcoming work makes reading this bitmap fairly common. Signed-off-by: Emilio G. Cota Reviewed-by: Lluís Vilanova Signed-off-by: Lluís Vilanova Message-id: 149915725977.6295.15069969323605305641.stgit@frigg.lan Signed-off-by: Stefan Hajnoczi --- include/qom/cpu.h | 9 +++------ qom/cpu.c | 8 -------- trace/control.c | 9 ++++++++- 3 files changed, 11 insertions(+), 15 deletions(-) diff --git a/include/qom/cpu.h b/include/qom/cpu.h index 04c31e63eb..5f188d4c28 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -259,6 +259,7 @@ typedef void (*run_on_cpu_func)(CPUState *cpu, run_on_cpu_data data); struct qemu_work_item; #define CPU_UNSET_NUMA_NODE_ID -1 +#define CPU_TRACE_DSTATE_MAX_EVENTS 32 /** * CPUState: @@ -370,12 +371,8 @@ struct CPUState { struct KVMState *kvm_state; struct kvm_run *kvm_run; - /* - * Used for events with 'vcpu' and *without* the 'disabled' properties. - * Dynamically allocated based on bitmap requried to hold up to - * trace_get_vcpu_event_count() entries. - */ - unsigned long *trace_dstate; + /* Used for events with 'vcpu' and *without* the 'disabled' properties */ + DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS); /* TODO Move common fields from CPUArchState here. */ int cpu_index; /* used by alpha TCG */ diff --git a/qom/cpu.c b/qom/cpu.c index a39ff6c19c..4f38db0dac 100644 --- a/qom/cpu.c +++ b/qom/cpu.c @@ -380,7 +380,6 @@ static void cpu_common_unrealizefn(DeviceState *dev, Error **errp) static void cpu_common_initfn(Object *obj) { - uint32_t count; CPUState *cpu = CPU(obj); CPUClass *cc = CPU_GET_CLASS(obj); @@ -395,18 +394,11 @@ static void cpu_common_initfn(Object *obj) QTAILQ_INIT(&cpu->breakpoints); QTAILQ_INIT(&cpu->watchpoints); - count = trace_get_vcpu_event_count(); - if (count) { - cpu->trace_dstate = bitmap_new(count); - } - cpu_exec_initfn(cpu); } static void cpu_common_finalize(Object *obj) { - CPUState *cpu = CPU(obj); - g_free(cpu->trace_dstate); } static int64_t cpu_common_get_arch_id(CPUState *cpu) diff --git a/trace/control.c b/trace/control.c index f5fb11d280..82d8989c4d 100644 --- a/trace/control.c +++ b/trace/control.c @@ -65,8 +65,15 @@ void trace_event_register_group(TraceEvent **events) size_t i; for (i = 0; events[i] != NULL; i++) { events[i]->id = next_id++; - if (events[i]->vcpu_id != TRACE_VCPU_EVENT_NONE) { + if (events[i]->vcpu_id == TRACE_VCPU_EVENT_NONE) { + continue; + } + + if (likely(next_vcpu_id < CPU_TRACE_DSTATE_MAX_EVENTS)) { events[i]->vcpu_id = next_vcpu_id++; + } else { + error_report("WARNING: too many vcpu trace events; dropping '%s'", + events[i]->name); } } event_groups = g_renew(TraceEventGroup, event_groups, nevent_groups + 1); From d43811165df75571055dab7b602526a40404a63e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Llu=C3=ADs=20Vilanova?= Date: Tue, 4 Jul 2017 10:38:26 +0200 Subject: [PATCH 2/7] trace: [tcg] Delay changes to dynamic state when translating MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This keeps consistency across all decisions taken during translation when the dynamic state of a vCPU is changed in the middle of translating some guest code. Signed-off-by: Lluís Vilanova Reviewed-by: Richard Henderson Reviewed-by: Emilio G. Cota Signed-off-by: Emilio G. Cota Message-id: 149915750615.6295.3713699402253529487.stgit@frigg.lan Signed-off-by: Stefan Hajnoczi --- include/qom/cpu.h | 3 +++ trace/control-target.c | 18 ++++++++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/include/qom/cpu.h b/include/qom/cpu.h index 5f188d4c28..25eefea7ab 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -302,6 +302,8 @@ struct qemu_work_item; * @kvm_fd: vCPU file descriptor for KVM. * @work_mutex: Lock to prevent multiple access to queued_work_*. * @queued_work_first: First asynchronous work pending. + * @trace_dstate_delayed: Delayed changes to trace_dstate (includes all changes + * to @trace_dstate). * @trace_dstate: Dynamic tracing state of events for this vCPU (bitmask). * * State of one CPU core or thread. @@ -372,6 +374,7 @@ struct CPUState { struct kvm_run *kvm_run; /* Used for events with 'vcpu' and *without* the 'disabled' properties */ + DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS); DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS); /* TODO Move common fields from CPUArchState here. */ diff --git a/trace/control-target.c b/trace/control-target.c index 99a8ed5157..31c2ed11a0 100644 --- a/trace/control-target.c +++ b/trace/control-target.c @@ -61,6 +61,13 @@ void trace_event_set_state_dynamic(TraceEvent *ev, bool state) } } +static void trace_event_synchronize_vcpu_state_dynamic( + CPUState *vcpu, run_on_cpu_data ignored) +{ + bitmap_copy(vcpu->trace_dstate, vcpu->trace_dstate_delayed, + CPU_TRACE_DSTATE_MAX_EVENTS); +} + void trace_event_set_vcpu_state_dynamic(CPUState *vcpu, TraceEvent *ev, bool state) { @@ -73,13 +80,20 @@ void trace_event_set_vcpu_state_dynamic(CPUState *vcpu, if (state_pre != state) { if (state) { trace_events_enabled_count++; - set_bit(vcpu_id, vcpu->trace_dstate); + set_bit(vcpu_id, vcpu->trace_dstate_delayed); (*ev->dstate)++; } else { trace_events_enabled_count--; - clear_bit(vcpu_id, vcpu->trace_dstate); + clear_bit(vcpu_id, vcpu->trace_dstate_delayed); (*ev->dstate)--; } + /* + * Delay changes until next TB; we want all TBs to be built from a + * single set of dstate values to ensure consistency of generated + * tracing code. + */ + async_run_on_cpu(vcpu, trace_event_synchronize_vcpu_state_dynamic, + RUN_ON_CPU_NULL); } } From 61a67f71dd7e437212a2730c4462dff9b3b7b3da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Llu=C3=ADs=20Vilanova?= Date: Tue, 4 Jul 2017 10:42:32 +0200 Subject: [PATCH 3/7] exec: [tcg] Use different TBs according to the vCPU's dynamic tracing state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every vCPU now uses a separate set of TBs for each set of dynamic tracing event state values. Each set of TBs can be used by any number of vCPUs to maximize TB reuse when vCPUs have the same tracing state. This feature is later used by tracetool to optimize tracing of guest code events. The maximum number of TB sets is defined as 2^E, where E is the number of events that have the 'vcpu' property (their state is stored in CPUState->trace_dstate). For this to work, a change on the dynamic tracing state of a vCPU will force it to flush its virtual TB cache (which is only indexed by address), and fall back to the physical TB cache (which now contains the vCPU's dynamic tracing state as part of the hashing function). Signed-off-by: Lluís Vilanova Reviewed-by: Richard Henderson Reviewed-by: Emilio G. Cota Signed-off-by: Emilio G. Cota Message-id: 149915775266.6295.10060144081246467690.stgit@frigg.lan Signed-off-by: Stefan Hajnoczi --- accel/tcg/cpu-exec.c | 8 ++++++-- accel/tcg/translate-all.c | 11 +++++++++-- include/exec/exec-all.h | 3 +++ include/exec/tb-hash-xx.h | 7 +++++-- include/exec/tb-hash.h | 5 +++-- tcg/tcg-runtime.c | 3 ++- tests/qht-bench.c | 2 +- trace/control-target.c | 1 + trace/control.h | 3 +++ 9 files changed, 33 insertions(+), 10 deletions(-) diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 3581618bc0..d84b01d1b8 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -280,6 +280,7 @@ struct tb_desc { CPUArchState *env; tb_page_addr_t phys_page1; uint32_t flags; + uint32_t trace_vcpu_dstate; }; static bool tb_cmp(const void *p, const void *d) @@ -291,6 +292,7 @@ static bool tb_cmp(const void *p, const void *d) tb->page_addr[0] == desc->phys_page1 && tb->cs_base == desc->cs_base && tb->flags == desc->flags && + tb->trace_vcpu_dstate == desc->trace_vcpu_dstate && !atomic_read(&tb->invalid)) { /* check next page if needed */ if (tb->page_addr[1] == -1) { @@ -319,10 +321,11 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, desc.env = (CPUArchState *)cpu->env_ptr; desc.cs_base = cs_base; desc.flags = flags; + desc.trace_vcpu_dstate = *cpu->trace_dstate; desc.pc = pc; phys_pc = get_page_addr_code(desc.env, pc); desc.phys_page1 = phys_pc & TARGET_PAGE_MASK; - h = tb_hash_func(phys_pc, pc, flags); + h = tb_hash_func(phys_pc, pc, flags, *cpu->trace_dstate); return qht_lookup(&tcg_ctx.tb_ctx.htable, tb_cmp, &desc, h); } @@ -342,7 +345,8 @@ static inline TranslationBlock *tb_find(CPUState *cpu, cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]); if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base || - tb->flags != flags)) { + tb->flags != flags || + tb->trace_vcpu_dstate != *cpu->trace_dstate)) { tb = tb_htable_lookup(cpu, pc, cs_base, flags); if (!tb) { diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 4e1831cbb9..090ebad0a7 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -54,6 +54,7 @@ #include "exec/tb-hash.h" #include "translate-all.h" #include "qemu/bitmap.h" +#include "qemu/error-report.h" #include "qemu/timer.h" #include "qemu/main-loop.h" #include "exec/log.h" @@ -112,6 +113,11 @@ typedef struct PageDesc { #define V_L2_BITS 10 #define V_L2_SIZE (1 << V_L2_BITS) +/* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */ +QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS > + sizeof(((TranslationBlock *)0)->trace_vcpu_dstate) + * BITS_PER_BYTE); + /* * L1 Mapping properties */ @@ -1071,7 +1077,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) /* remove the TB from the hash list */ phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); - h = tb_hash_func(phys_pc, tb->pc, tb->flags); + h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->trace_vcpu_dstate); qht_remove(&tcg_ctx.tb_ctx.htable, tb, h); /* remove the TB from the page list */ @@ -1216,7 +1222,7 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, } /* add in the hash table */ - h = tb_hash_func(phys_pc, tb->pc, tb->flags); + h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->trace_vcpu_dstate); qht_insert(&tcg_ctx.tb_ctx.htable, tb, h); #ifdef DEBUG_TB_CHECK @@ -1262,6 +1268,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tb->cs_base = cs_base; tb->flags = flags; tb->cflags = cflags; + tb->trace_vcpu_dstate = *cpu->trace_dstate; tb->invalid = false; #ifdef CONFIG_PROFILER diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index bf8da2aa5a..c09b365716 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -330,6 +330,9 @@ struct TranslationBlock { #define CF_USE_ICOUNT 0x20000 #define CF_IGNORE_ICOUNT 0x40000 /* Do not generate icount code */ + /* Per-vCPU dynamic tracing state used to generate this TB */ + uint32_t trace_vcpu_dstate; + uint16_t invalid; void *tc_ptr; /* pointer to the translated code */ diff --git a/include/exec/tb-hash-xx.h b/include/exec/tb-hash-xx.h index 2c40b5c466..6cd3022c07 100644 --- a/include/exec/tb-hash-xx.h +++ b/include/exec/tb-hash-xx.h @@ -49,7 +49,7 @@ * contiguous in memory. */ static inline -uint32_t tb_hash_func5(uint64_t a0, uint64_t b0, uint32_t e) +uint32_t tb_hash_func6(uint64_t a0, uint64_t b0, uint32_t e, uint32_t f) { uint32_t v1 = TB_HASH_XX_SEED + PRIME32_1 + PRIME32_2; uint32_t v2 = TB_HASH_XX_SEED + PRIME32_2; @@ -78,11 +78,14 @@ uint32_t tb_hash_func5(uint64_t a0, uint64_t b0, uint32_t e) v4 *= PRIME32_1; h32 = rol32(v1, 1) + rol32(v2, 7) + rol32(v3, 12) + rol32(v4, 18); - h32 += 20; + h32 += 24; h32 += e * PRIME32_3; h32 = rol32(h32, 17) * PRIME32_4; + h32 += f * PRIME32_3; + h32 = rol32(h32, 17) * PRIME32_4; + h32 ^= h32 >> 15; h32 *= PRIME32_2; h32 ^= h32 >> 13; diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h index b1fe2d0161..17b5ee0edf 100644 --- a/include/exec/tb-hash.h +++ b/include/exec/tb-hash.h @@ -58,9 +58,10 @@ static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc) #endif /* CONFIG_SOFTMMU */ static inline -uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags) +uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags, + uint32_t trace_vcpu_dstate) { - return tb_hash_func5(phys_pc, pc, flags); + return tb_hash_func6(phys_pc, pc, flags, trace_vcpu_dstate); } #endif diff --git a/tcg/tcg-runtime.c b/tcg/tcg-runtime.c index ec3a34e461..3e23649dd7 100644 --- a/tcg/tcg-runtime.c +++ b/tcg/tcg-runtime.c @@ -158,7 +158,8 @@ void *HELPER(lookup_tb_ptr)(CPUArchState *env, target_ulong addr) if (unlikely(!(tb && tb->pc == addr && tb->cs_base == cs_base - && tb->flags == flags))) { + && tb->flags == flags + && tb->trace_vcpu_dstate == *cpu->trace_dstate))) { tb = tb_htable_lookup(cpu, addr, cs_base, flags); if (!tb) { return tcg_ctx.code_gen_epilogue; diff --git a/tests/qht-bench.c b/tests/qht-bench.c index 2afa09d859..11c1cec766 100644 --- a/tests/qht-bench.c +++ b/tests/qht-bench.c @@ -103,7 +103,7 @@ static bool is_equal(const void *obj, const void *userp) static inline uint32_t h(unsigned long v) { - return tb_hash_func5(v, 0, 0); + return tb_hash_func6(v, 0, 0, 0); } /* diff --git a/trace/control-target.c b/trace/control-target.c index 31c2ed11a0..4e36101997 100644 --- a/trace/control-target.c +++ b/trace/control-target.c @@ -66,6 +66,7 @@ static void trace_event_synchronize_vcpu_state_dynamic( { bitmap_copy(vcpu->trace_dstate, vcpu->trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS); + cpu_tb_jmp_cache_clear(vcpu); } void trace_event_set_vcpu_state_dynamic(CPUState *vcpu, diff --git a/trace/control.h b/trace/control.h index 4ea53e2986..b931824d60 100644 --- a/trace/control.h +++ b/trace/control.h @@ -165,6 +165,9 @@ void trace_event_set_state_dynamic(TraceEvent *ev, bool state); * Set the dynamic tracing state of an event for the given vCPU. * * Pre-condition: trace_event_get_vcpu_state_static(ev) == true + * + * Note: Changes for execution-time events with the 'tcg' property will not be + * propagated until the next TB is executed (iff executing in TCG mode). */ void trace_event_set_vcpu_state_dynamic(CPUState *vcpu, TraceEvent *ev, bool state); From 864a2178d4c014a217cacf76e42b818fe9feb1d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Llu=C3=ADs=20Vilanova?= Date: Tue, 4 Jul 2017 10:46:39 +0200 Subject: [PATCH 4/7] trace: [tcg] Do not generate TCG code to trace dynamically-disabled events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If an event is dynamically disabled, the TCG code that calls the execution-time tracer is not generated. Removes the overheads of execution-time tracers for dynamically disabled events. As a bonus, also avoids checking the event state when the execution-time tracer is called from TCG-generated code (since otherwise TCG would simply not call it). Signed-off-by: Lluís Vilanova Signed-off-by: Emilio G. Cota Message-id: 149915799921.6295.13067154430923434035.stgit@frigg.lan Signed-off-by: Stefan Hajnoczi --- scripts/tracetool/__init__.py | 3 ++- scripts/tracetool/format/h.py | 26 +++++++++++++++++------- scripts/tracetool/format/tcg_h.py | 21 +++++++++++++++---- scripts/tracetool/format/tcg_helper_c.py | 5 +++-- 4 files changed, 41 insertions(+), 14 deletions(-) diff --git a/scripts/tracetool/__init__.py b/scripts/tracetool/__init__.py index 1ffbc1dc40..d4c204a472 100644 --- a/scripts/tracetool/__init__.py +++ b/scripts/tracetool/__init__.py @@ -6,7 +6,7 @@ """ __author__ = "Lluís Vilanova " -__copyright__ = "Copyright 2012-2016, Lluís Vilanova " +__copyright__ = "Copyright 2012-2017, Lluís Vilanova " __license__ = "GPL version 2 or (at your option) any later version" __maintainer__ = "Stefan Hajnoczi" @@ -268,6 +268,7 @@ def formats(self): return self._FMT.findall(self.fmt) QEMU_TRACE = "trace_%(name)s" + QEMU_TRACE_NOCHECK = "_nocheck__" + QEMU_TRACE QEMU_TRACE_TCG = QEMU_TRACE + "_tcg" QEMU_DSTATE = "_TRACE_%(NAME)s_DSTATE" QEMU_EVENT = "_TRACE_%(NAME)s_EVENT" diff --git a/scripts/tracetool/format/h.py b/scripts/tracetool/format/h.py index 3682f4e6a8..aecf249d66 100644 --- a/scripts/tracetool/format/h.py +++ b/scripts/tracetool/format/h.py @@ -6,7 +6,7 @@ """ __author__ = "Lluís Vilanova " -__copyright__ = "Copyright 2012-2016, Lluís Vilanova " +__copyright__ = "Copyright 2012-2017, Lluís Vilanova " __license__ = "GPL version 2 or (at your option) any later version" __maintainer__ = "Stefan Hajnoczi" @@ -49,6 +49,19 @@ def generate(events, backend, group): backend.generate_begin(events, group) for e in events: + # tracer without checks + out('', + 'static inline void %(api)s(%(args)s)', + '{', + api=e.api(e.QEMU_TRACE_NOCHECK), + args=e.args) + + if "disable" not in e.properties: + backend.generate(e, group) + + out('}') + + # tracer wrapper with checks (per-vCPU tracing) if "vcpu" in e.properties: trace_cpu = next(iter(e.args))[1] cond = "trace_event_get_vcpu_state(%(cpu)s,"\ @@ -63,16 +76,15 @@ def generate(events, backend, group): 'static inline void %(api)s(%(args)s)', '{', ' if (%(cond)s) {', + ' %(api_nocheck)s(%(names)s);', + ' }', + '}', api=e.api(), + api_nocheck=e.api(e.QEMU_TRACE_NOCHECK), args=e.args, + names=", ".join(e.args.names()), cond=cond) - if "disable" not in e.properties: - backend.generate(e, group) - - out(' }', - '}') - backend.generate_end(events, group) out('#endif /* TRACE_%s_GENERATED_TRACERS_H */' % group.upper()) diff --git a/scripts/tracetool/format/tcg_h.py b/scripts/tracetool/format/tcg_h.py index db55f52eb5..1651cc3f71 100644 --- a/scripts/tracetool/format/tcg_h.py +++ b/scripts/tracetool/format/tcg_h.py @@ -6,7 +6,7 @@ """ __author__ = "Lluís Vilanova " -__copyright__ = "Copyright 2012-2016, Lluís Vilanova " +__copyright__ = "Copyright 2012-2017, Lluís Vilanova " __license__ = "GPL version 2 or (at your option) any later version" __maintainer__ = "Stefan Hajnoczi" @@ -46,7 +46,7 @@ def generate(events, backend, group): for e in events: # just keep one of them - if "tcg-trans" not in e.properties: + if "tcg-exec" not in e.properties: continue out('static inline void %(name_tcg)s(%(args)s)', @@ -58,12 +58,25 @@ def generate(events, backend, group): args_trans = e.original.event_trans.args args_exec = tracetool.vcpu.transform_args( "tcg_helper_c", e.original.event_exec, "wrapper") + if "vcpu" in e.properties: + trace_cpu = e.args.names()[0] + cond = "trace_event_get_vcpu_state(%(cpu)s,"\ + " TRACE_%(id)s)"\ + % dict( + cpu=trace_cpu, + id=e.original.event_exec.name.upper()) + else: + cond = "true" + out(' %(name_trans)s(%(argnames_trans)s);', - ' gen_helper_%(name_exec)s(%(argnames_exec)s);', + ' if (%(cond)s) {', + ' gen_helper_%(name_exec)s(%(argnames_exec)s);', + ' }', name_trans=e.original.event_trans.api(e.QEMU_TRACE), name_exec=e.original.event_exec.api(e.QEMU_TRACE), argnames_trans=", ".join(args_trans.names()), - argnames_exec=", ".join(args_exec.names())) + argnames_exec=", ".join(args_exec.names()), + cond=cond) out('}') diff --git a/scripts/tracetool/format/tcg_helper_c.py b/scripts/tracetool/format/tcg_helper_c.py index ec7acbe347..bbbd6ad0f4 100644 --- a/scripts/tracetool/format/tcg_helper_c.py +++ b/scripts/tracetool/format/tcg_helper_c.py @@ -6,7 +6,7 @@ """ __author__ = "Lluís Vilanova " -__copyright__ = "Copyright 2012-2016, Lluís Vilanova " +__copyright__ = "Copyright 2012-2017, Lluís Vilanova " __license__ = "GPL version 2 or (at your option) any later version" __maintainer__ = "Stefan Hajnoczi" @@ -71,10 +71,11 @@ def generate(events, backend, group): out('void %(name_tcg)s(%(args_api)s)', '{', + # NOTE: the check was already performed at TCG-generation time ' %(name)s(%(args_call)s);', '}', name_tcg="helper_%s_proxy" % e.api(), - name=e.api(), + name=e.api(e.QEMU_TRACE_NOCHECK), args_api=e_args_api, args_call=", ".join(e_args_call.casted()), ) From 1ff7b531963a71f10880d119db8fbcc591a2652a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Llu=C3=ADs=20Vilanova?= Date: Tue, 4 Jul 2017 10:50:46 +0200 Subject: [PATCH 5/7] trace: [tcg, trivial] Re-align generated code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Last patch removed a nesting level in generated code. Re-align all code generated by backends to be 4-column aligned. Signed-off-by: Lluís Vilanova Signed-off-by: Emilio G. Cota Message-id: 149915824586.6295.17820926011082409033.stgit@frigg.lan Signed-off-by: Stefan Hajnoczi --- scripts/tracetool/backend/dtrace.py | 4 ++-- scripts/tracetool/backend/ftrace.py | 20 ++++++++++---------- scripts/tracetool/backend/log.py | 19 ++++++++++--------- scripts/tracetool/backend/simple.py | 4 ++-- scripts/tracetool/backend/syslog.py | 6 +++--- scripts/tracetool/backend/ust.py | 4 ++-- 6 files changed, 29 insertions(+), 28 deletions(-) diff --git a/scripts/tracetool/backend/dtrace.py b/scripts/tracetool/backend/dtrace.py index c469cbd1a3..c6812b70a2 100644 --- a/scripts/tracetool/backend/dtrace.py +++ b/scripts/tracetool/backend/dtrace.py @@ -6,7 +6,7 @@ """ __author__ = "Lluís Vilanova " -__copyright__ = "Copyright 2012-2016, Lluís Vilanova " +__copyright__ = "Copyright 2012-2017, Lluís Vilanova " __license__ = "GPL version 2 or (at your option) any later version" __maintainer__ = "Stefan Hajnoczi" @@ -46,6 +46,6 @@ def generate_h_begin(events, group): def generate_h(event, group): - out(' QEMU_%(uppername)s(%(argnames)s);', + out(' QEMU_%(uppername)s(%(argnames)s);', uppername=event.name.upper(), argnames=", ".join(event.args.names())) diff --git a/scripts/tracetool/backend/ftrace.py b/scripts/tracetool/backend/ftrace.py index db9fe7ad57..dd0eda4441 100644 --- a/scripts/tracetool/backend/ftrace.py +++ b/scripts/tracetool/backend/ftrace.py @@ -29,17 +29,17 @@ def generate_h(event, group): if len(event.args) > 0: argnames = ", " + argnames - out(' {', - ' char ftrace_buf[MAX_TRACE_STRLEN];', - ' int unused __attribute__ ((unused));', - ' int trlen;', - ' if (trace_event_get_state(%(event_id)s)) {', - ' trlen = snprintf(ftrace_buf, MAX_TRACE_STRLEN,', - ' "%(name)s " %(fmt)s "\\n" %(argnames)s);', - ' trlen = MIN(trlen, MAX_TRACE_STRLEN - 1);', - ' unused = write(trace_marker_fd, ftrace_buf, trlen);', - ' }', + out(' {', + ' char ftrace_buf[MAX_TRACE_STRLEN];', + ' int unused __attribute__ ((unused));', + ' int trlen;', + ' if (trace_event_get_state(%(event_id)s)) {', + ' trlen = snprintf(ftrace_buf, MAX_TRACE_STRLEN,', + ' "%(name)s " %(fmt)s "\\n" %(argnames)s);', + ' trlen = MIN(trlen, MAX_TRACE_STRLEN - 1);', + ' unused = write(trace_marker_fd, ftrace_buf, trlen);', ' }', + ' }', name=event.name, args=event.args, event_id="TRACE_" + event.name.upper(), diff --git a/scripts/tracetool/backend/log.py b/scripts/tracetool/backend/log.py index 4f4a4d38b1..54f0a69886 100644 --- a/scripts/tracetool/backend/log.py +++ b/scripts/tracetool/backend/log.py @@ -6,7 +6,7 @@ """ __author__ = "Lluís Vilanova " -__copyright__ = "Copyright 2012-2016, Lluís Vilanova " +__copyright__ = "Copyright 2012-2017, Lluís Vilanova " __license__ = "GPL version 2 or (at your option) any later version" __maintainer__ = "Stefan Hajnoczi" @@ -35,14 +35,15 @@ def generate_h(event, group): else: cond = "trace_event_get_state(%s)" % ("TRACE_" + event.name.upper()) - out(' if (%(cond)s) {', - ' struct timeval _now;', - ' gettimeofday(&_now, NULL);', - ' qemu_log_mask(LOG_TRACE, "%%d@%%zd.%%06zd:%(name)s " %(fmt)s "\\n",', - ' getpid(),', - ' (size_t)_now.tv_sec, (size_t)_now.tv_usec', - ' %(argnames)s);', - ' }', + out(' if (%(cond)s) {', + ' struct timeval _now;', + ' gettimeofday(&_now, NULL);', + ' qemu_log_mask(LOG_TRACE,', + ' "%%d@%%zd.%%06zd:%(name)s " %(fmt)s "\\n",', + ' getpid(),', + ' (size_t)_now.tv_sec, (size_t)_now.tv_usec', + ' %(argnames)s);', + ' }', cond=cond, name=event.name, fmt=event.fmt.rstrip("\n"), diff --git a/scripts/tracetool/backend/simple.py b/scripts/tracetool/backend/simple.py index 4acc06e81c..f983670ee1 100644 --- a/scripts/tracetool/backend/simple.py +++ b/scripts/tracetool/backend/simple.py @@ -6,7 +6,7 @@ """ __author__ = "Lluís Vilanova " -__copyright__ = "Copyright 2012-2014, Lluís Vilanova " +__copyright__ = "Copyright 2012-2017, Lluís Vilanova " __license__ = "GPL version 2 or (at your option) any later version" __maintainer__ = "Stefan Hajnoczi" @@ -37,7 +37,7 @@ def generate_h_begin(events, group): def generate_h(event, group): - out(' _simple_%(api)s(%(args)s);', + out(' _simple_%(api)s(%(args)s);', api=event.api(), args=", ".join(event.args.names())) diff --git a/scripts/tracetool/backend/syslog.py b/scripts/tracetool/backend/syslog.py index b8ff2790c4..1ce627f0fc 100644 --- a/scripts/tracetool/backend/syslog.py +++ b/scripts/tracetool/backend/syslog.py @@ -35,9 +35,9 @@ def generate_h(event, group): else: cond = "trace_event_get_state(%s)" % ("TRACE_" + event.name.upper()) - out(' if (%(cond)s) {', - ' syslog(LOG_INFO, "%(name)s " %(fmt)s %(argnames)s);', - ' }', + out(' if (%(cond)s) {', + ' syslog(LOG_INFO, "%(name)s " %(fmt)s %(argnames)s);', + ' }', cond=cond, name=event.name, fmt=event.fmt.rstrip("\n"), diff --git a/scripts/tracetool/backend/ust.py b/scripts/tracetool/backend/ust.py index 52ce892478..2adaf548d5 100644 --- a/scripts/tracetool/backend/ust.py +++ b/scripts/tracetool/backend/ust.py @@ -6,7 +6,7 @@ """ __author__ = "Lluís Vilanova " -__copyright__ = "Copyright 2012-2016, Lluís Vilanova " +__copyright__ = "Copyright 2012-2017, Lluís Vilanova " __license__ = "GPL version 2 or (at your option) any later version" __maintainer__ = "Stefan Hajnoczi" @@ -35,6 +35,6 @@ def generate_h(event, group): if len(event.args) > 0: argnames = ", " + argnames - out(' tracepoint(qemu, %(name)s%(tp_args)s);', + out(' tracepoint(qemu, %(name)s%(tp_args)s);', name=event.name, tp_args=argnames) From 5caa262fdadae08f7c5af7f1328d424823811aaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Llu=C3=ADs=20Vilanova?= Date: Tue, 4 Jul 2017 10:54:52 +0200 Subject: [PATCH 6/7] trace: [trivial] Statically enable all guest events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The existing optimizations makes it feasible to have them available on all builds. Some quick'n'dirty numbers with 400.perlbench (SPECcpu2006) on the train input (medium size - suns.pl) and the guest_mem_before event: * vanilla, statically disabled real 0m2,259s user 0m2,252s sys 0m0,004s * vanilla, statically enabled (overhead: 2.18x) real 0m4,921s user 0m4,912s sys 0m0,008s * multi-tb, statically disabled (overhead: 0.99x) [within noise range] real 0m2,228s user 0m2,216s sys 0m0,008s * multi-tb, statically enabled (overhead: 0.99x) [within noise range] real 0m2,229s user 0m2,224s sys 0m0,004s Now enabling all events when booting an ARM system that immediately shuts down (https://lists.gnu.org/archive/html/qemu-devel/2017-06/msg04085.html): * vanilla, statically disabled real 0m32,153s user 0m31,276s sys 0m0,108s * vanilla, statically enabled (overhead: 1.35x) real 0m43,507s user 0m42,680s sys 0m0,168s * multi-tb, statically disabled (overhead: 1.03x) real 0m32,993s user 0m32,516s sys 0m0,104s * multi-tb, statically enabled (overhead: 1.00x) [within noise range] real 0m32,110s user 0m31,176s sys 0m0,156s And finally enabling all events using Emilio's dbt-bench (where orig == vanilla, new == multi-tb): NBench score; higher is better 180 +-+--------+----------+----------+---------+----------+----------+----------+----------+----------+---------+----------+--------+-+ | | | *** $$$$%% orig | 160 +-+....................................*.*.$..$.%............................................................orig-enabled +-+ | * * $ $ % new | 140 +-+....................................*.*.$..$.%............................................................new-disabled.......+-+ | * * $ $ % | | * * $ $ % | 120 +-+....................................*.*.$..$.%...............................................................................+-+ | * * $ $ % | | * * $ $ % | 100 +-+....................................*.*.$..$.%.....$$$%%%....................................................................+-+ | * * $ $ % *** $ $ % *** $$$%% | 80 +-+....................................*.*.$..$.%.*.*.$.$..%.*.*.$.$.%..........................................................+-+ | * * $ $ % * * $ $ % * * $ $ % | | * * $ $ % * * $ $ % * * $ $ % | 60 +-+.........................***..$$$%%.*.*##..$.%.*.*.$.$..%.*.*.$.$.%..***.$$$%%...............................................+-+ | **** $$$%% * * $ $ % * * # $ % * *## $ % * * $ $ % * * $ $ % | | * * $ $ % * * $ $ % * * # $ % * * # $ % * *## $ % * * $ $ % | 40 +-+..............*..*.$.$.%.*.*..$.$.%.*.*.#..$.%.*.*.#.$..%.*.*.#.$.%..*.*.$.$.%...............................................+-+ | * * $ $ % * * $ $ % * * # $ % * * # $ % * * # $ % * *## $ % *** $$$%%% | 20 +-+....***.$$$%%.*..*##.$.%.*.*###.$.%.*.*.#..$.%.*.*.#.$..%.*.*.#.$.%..*.*.#.$.%..................................*.*.$.$..%...+-+ | * *## $ % * * # $ % * * # $ % * * # $ % * * # $ % * * # $ % * * # $ % * *## $ % | | * * # $ % * * # $ % * * # $ % * * # $ % * * # $ % * * # $ % * * # $ % ***###$$%% ***##$$$%% * * # $ % | 0 +-+----***##$$%%-****##$$%%-***###$$%%-***##$$$%%-***##$$%%%-***##$$%%--***##$$%%-****##$$%%-***###$$%%-***##$$$%%-***##$$%%%---+-+ NUMERIC SORTSTRING SORT BITFIEFP EMULATION ASSIGNMENT IDEA HUFFMAN FOURIER NEURLU DECOMPOSITION gmean png: http://imgur.com/a/8XG5S Signed-off-by: Lluís Vilanova Reviewed-by: Emilio G. Cota Signed-off-by: Emilio G. Cota Message-id: 149915849243.6295.4484103824675839071.stgit@frigg.lan Signed-off-by: Stefan Hajnoczi --- trace-events | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/trace-events b/trace-events index bae63fdb1d..f9dbd7f509 100644 --- a/trace-events +++ b/trace-events @@ -106,7 +106,7 @@ vcpu guest_cpu_reset(void) # # Mode: user, softmmu # Targets: TCG(all) -disable vcpu tcg guest_mem_before(TCGv vaddr, uint8_t info) "info=%d", "vaddr=0x%016"PRIx64" info=%d" +vcpu tcg guest_mem_before(TCGv vaddr, uint8_t info) "info=%d", "vaddr=0x%016"PRIx64" info=%d" # @num: System call number. # @arg*: System call argument value. @@ -115,7 +115,7 @@ disable vcpu tcg guest_mem_before(TCGv vaddr, uint8_t info) "info=%d", "vaddr=0x # # Mode: user # Targets: TCG(all) -disable vcpu guest_user_syscall(uint64_t num, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, uint64_t arg7, uint64_t arg8) "num=0x%016"PRIx64" arg1=0x%016"PRIx64" arg2=0x%016"PRIx64" arg3=0x%016"PRIx64" arg4=0x%016"PRIx64" arg5=0x%016"PRIx64" arg6=0x%016"PRIx64" arg7=0x%016"PRIx64" arg8=0x%016"PRIx64 +vcpu guest_user_syscall(uint64_t num, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, uint64_t arg7, uint64_t arg8) "num=0x%016"PRIx64" arg1=0x%016"PRIx64" arg2=0x%016"PRIx64" arg3=0x%016"PRIx64" arg4=0x%016"PRIx64" arg5=0x%016"PRIx64" arg6=0x%016"PRIx64" arg7=0x%016"PRIx64" arg8=0x%016"PRIx64 # @num: System call number. # @ret: System call result value. @@ -124,4 +124,4 @@ disable vcpu guest_user_syscall(uint64_t num, uint64_t arg1, uint64_t arg2, uint # # Mode: user # Targets: TCG(all) -disable vcpu guest_user_syscall_ret(uint64_t num, uint64_t ret) "num=0x%016"PRIx64" ret=0x%016"PRIx64 +vcpu guest_user_syscall_ret(uint64_t num, uint64_t ret) "num=0x%016"PRIx64" ret=0x%016"PRIx64 From 304187c51cfe1ffda1afc9b86c4ccae1cbac68cb Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Fri, 14 Jul 2017 14:31:11 +0100 Subject: [PATCH 7/7] trace: update old trace events in docs Commit c5f1ad429cdf26023cf331075a7d327708e3db6d ("block: Remove bdrv_aio_readv/writev/flush()") removed bdrv_aio_readv()/bdrv_aio_writev() so the example in the tracing documentation is no longer valid. Reported-by: Wang Dong Signed-off-by: Stefan Hajnoczi Reviewed-by: Eric Blake Message-id: 20170714133111.27359-1-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi --- docs/devel/tracing.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/devel/tracing.txt b/docs/devel/tracing.txt index 8c0029beca..5768a0b7a2 100644 --- a/docs/devel/tracing.txt +++ b/docs/devel/tracing.txt @@ -14,8 +14,7 @@ for debugging, profiling, and observing execution. 2. Create a file with the events you want to trace: - echo bdrv_aio_readv > /tmp/events - echo bdrv_aio_writev >> /tmp/events + echo memory_region_ops_read >/tmp/events 3. Run the virtual machine to produce a trace file: