tcg: remove the final vestiges of dstate

Now we no longer have dynamic state affecting things we can remove the
additional fields in cpu.h and simplify the TB hash calculation.

For the benchmark:

    hyperfine -w 2 -m 20 \
      "./arm-softmmu/qemu-system-arm -cpu cortex-a15 \
        -machine type=virt,highmem=off \
        -display none -m 2048 \
        -serial mon:stdio \
        -netdev user,id=unet,hostfwd=tcp::2222-:22 \
        -device virtio-net-pci,netdev=unet \
        -device virtio-scsi-pci \
        -blockdev driver=raw,node-name=hd,discard=unmap,file.driver=host_device,file.filename=/dev/zen-disk/debian-bullseye-armhf \
        -device scsi-hd,drive=hd -smp 4 \
        -kernel /home/alex/lsrc/linux.git/builds/arm/arch/arm/boot/zImage \
        -append 'console=ttyAMA0 root=/dev/sda2 systemd.unit=benchmark.service' \
        -snapshot"

It has a marginal effect on runtime, before:

  Time (mean ± σ):     26.279 s ±  2.438 s    [User: 41.113 s, System: 1.843 s]
  Range (min … max):   24.420 s … 32.565 s    20 runs

after:

  Time (mean ± σ):     24.440 s ±  2.885 s    [User: 34.474 s, System: 2.028 s]
  Range (min … max):   21.663 s … 29.937 s    20 runs

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1358
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 20230526165401.574474-10-alex.bennee@linaro.org
Message-Id: <20230524133952.3971948-9-alex.bennee@linaro.org>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
Alex Bennée 2023-05-26 17:53:59 +01:00 committed by Stefan Hajnoczi
parent 333df1c6c7
commit d0aaf08bb9
6 changed files with 6 additions and 26 deletions

View file

@ -175,7 +175,6 @@ struct tb_desc {
tb_page_addr_t page_addr0;
uint32_t flags;
uint32_t cflags;
uint32_t trace_vcpu_dstate;
};
static bool tb_lookup_cmp(const void *p, const void *d)
@ -187,7 +186,6 @@ static bool tb_lookup_cmp(const void *p, const void *d)
tb_page_addr0(tb) == desc->page_addr0 &&
tb->cs_base == desc->cs_base &&
tb->flags == desc->flags &&
tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
tb_cflags(tb) == desc->cflags) {
/* check next page if needed */
tb_page_addr_t tb_phys_page1 = tb_page_addr1(tb);
@ -228,7 +226,6 @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
desc.cs_base = cs_base;
desc.flags = flags;
desc.cflags = cflags;
desc.trace_vcpu_dstate = *cpu->trace_dstate;
desc.pc = pc;
phys_pc = get_page_addr_code(desc.env, pc);
if (phys_pc == -1) {
@ -236,7 +233,7 @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
}
desc.page_addr0 = phys_pc;
h = tb_hash_func(phys_pc, (cflags & CF_PCREL ? 0 : pc),
flags, cflags, *cpu->trace_dstate);
flags, cflags);
return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
}
@ -263,7 +260,6 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
jc->array[hash].pc == pc &&
tb->cs_base == cs_base &&
tb->flags == flags &&
tb->trace_vcpu_dstate == *cpu->trace_dstate &&
tb_cflags(tb) == cflags)) {
return tb;
}
@ -282,7 +278,6 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
tb->pc == pc &&
tb->cs_base == cs_base &&
tb->flags == flags &&
tb->trace_vcpu_dstate == *cpu->trace_dstate &&
tb_cflags(tb) == cflags)) {
return tb;
}

View file

@ -61,10 +61,10 @@ static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
#endif /* CONFIG_SOFTMMU */
static inline
uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags,
uint32_t cf_mask, uint32_t trace_vcpu_dstate)
uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc,
uint32_t flags, uint32_t cf_mask)
{
return qemu_xxhash7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate);
return qemu_xxhash6(phys_pc, pc, flags, cf_mask);
}
#endif

View file

@ -50,7 +50,6 @@ static bool tb_cmp(const void *ap, const void *bp)
a->cs_base == b->cs_base &&
a->flags == b->flags &&
(tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
tb_page_addr0(a) == tb_page_addr0(b) &&
tb_page_addr1(a) == tb_page_addr1(b));
}
@ -888,7 +887,7 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
/* remove the TB from the hash list */
phys_pc = tb_page_addr0(tb);
h = tb_hash_func(phys_pc, (orig_cflags & CF_PCREL ? 0 : tb->pc),
tb->flags, orig_cflags, tb->trace_vcpu_dstate);
tb->flags, orig_cflags);
if (!qht_remove(&tb_ctx.htable, tb, h)) {
return;
}
@ -969,7 +968,7 @@ TranslationBlock *tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
/* add in the hash table */
h = tb_hash_func(phys_pc, (tb->cflags & CF_PCREL ? 0 : tb->pc),
tb->flags, tb->cflags, tb->trace_vcpu_dstate);
tb->flags, tb->cflags);
qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
/* remove TB from the page(s) if we couldn't insert it */

View file

@ -65,11 +65,6 @@
#include "internal.h"
#include "perf.h"
/* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
sizeof_field(TranslationBlock, trace_vcpu_dstate)
* BITS_PER_BYTE);
TBContext tb_ctx;
/*
@ -352,7 +347,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
tb->cs_base = cs_base;
tb->flags = flags;
tb->cflags = cflags;
tb->trace_vcpu_dstate = *cpu->trace_dstate;
tb_set_page_addr0(tb, phys_pc);
tb_set_page_addr1(tb, -1);
tcg_ctx->gen_tb = tb;

View file

@ -545,9 +545,6 @@ struct TranslationBlock {
#define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */
#define CF_CLUSTER_SHIFT 24
/* Per-vCPU dynamic tracing state used to generate this TB */
uint32_t trace_vcpu_dstate;
/*
* Above fields used for comparing
*/

View file

@ -266,7 +266,6 @@ typedef void (*run_on_cpu_func)(CPUState *cpu, run_on_cpu_data data);
struct qemu_work_item;
#define CPU_UNSET_NUMA_NODE_ID -1
#define CPU_TRACE_DSTATE_MAX_EVENTS 32
/**
* CPUState:
@ -407,10 +406,6 @@ struct CPUState {
/* Use by accel-block: CPU is executing an ioctl() */
QemuLockCnt in_ioctl_lock;
/* Used for events with 'vcpu' and *without* the 'disabled' properties */
DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS);
DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS);
DECLARE_BITMAP(plugin_mask, QEMU_PLUGIN_EV_MAX);
#ifdef CONFIG_PLUGIN