From 732d548732edda90f3cfe0f7dceee64861f07b25 Mon Sep 17 00:00:00 2001 From: Anton Johansson Date: Wed, 21 Jun 2023 15:56:22 +0200 Subject: [PATCH 01/22] accel: Replace target_ulong in tlb_*() Replaces target_ulong with vaddr for guest virtual addresses in tlb_*() functions and auxilliary structs. Signed-off-by: Anton Johansson Reviewed-by: Richard Henderson Message-Id: <20230621135633.1649-2-anjo@rev.ng> Signed-off-by: Richard Henderson --- accel/stubs/tcg-stub.c | 2 +- accel/tcg/cputlb.c | 177 +++++++++++++++++------------------ accel/tcg/tb-maint.c | 2 +- include/exec/cpu-defs.h | 4 +- include/exec/exec-all.h | 79 ++++++++-------- include/qemu/plugin-memory.h | 2 +- 6 files changed, 131 insertions(+), 135 deletions(-) diff --git a/accel/stubs/tcg-stub.c b/accel/stubs/tcg-stub.c index 813695b402..0998e601ad 100644 --- a/accel/stubs/tcg-stub.c +++ b/accel/stubs/tcg-stub.c @@ -18,7 +18,7 @@ void tb_flush(CPUState *cpu) { } -void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) +void tlb_set_dirty(CPUState *cpu, vaddr vaddr) { } diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 14ce97c33b..5caeccb52d 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -427,7 +427,7 @@ void tlb_flush_all_cpus_synced(CPUState *src_cpu) } static bool tlb_hit_page_mask_anyprot(CPUTLBEntry *tlb_entry, - target_ulong page, target_ulong mask) + vaddr page, vaddr mask) { page &= mask; mask &= TARGET_PAGE_MASK | TLB_INVALID_MASK; @@ -437,8 +437,7 @@ static bool tlb_hit_page_mask_anyprot(CPUTLBEntry *tlb_entry, page == (tlb_entry->addr_code & mask)); } -static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, - target_ulong page) +static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, vaddr page) { return tlb_hit_page_mask_anyprot(tlb_entry, page, -1); } @@ -454,8 +453,8 @@ static inline bool tlb_entry_is_empty(const CPUTLBEntry *te) /* Called with tlb_c.lock held */ static bool tlb_flush_entry_mask_locked(CPUTLBEntry *tlb_entry, - target_ulong page, - target_ulong mask) + vaddr page, + vaddr mask) { if (tlb_hit_page_mask_anyprot(tlb_entry, page, mask)) { memset(tlb_entry, -1, sizeof(*tlb_entry)); @@ -464,16 +463,15 @@ static bool tlb_flush_entry_mask_locked(CPUTLBEntry *tlb_entry, return false; } -static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, - target_ulong page) +static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, vaddr page) { return tlb_flush_entry_mask_locked(tlb_entry, page, -1); } /* Called with tlb_c.lock held */ static void tlb_flush_vtlb_page_mask_locked(CPUArchState *env, int mmu_idx, - target_ulong page, - target_ulong mask) + vaddr page, + vaddr mask) { CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx]; int k; @@ -487,21 +485,20 @@ static void tlb_flush_vtlb_page_mask_locked(CPUArchState *env, int mmu_idx, } static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, - target_ulong page) + vaddr page) { tlb_flush_vtlb_page_mask_locked(env, mmu_idx, page, -1); } -static void tlb_flush_page_locked(CPUArchState *env, int midx, - target_ulong page) +static void tlb_flush_page_locked(CPUArchState *env, int midx, vaddr page) { - target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr; - target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask; + vaddr lp_addr = env_tlb(env)->d[midx].large_page_addr; + vaddr lp_mask = env_tlb(env)->d[midx].large_page_mask; /* Check if we need to flush due to large pages. */ if ((page & lp_mask) == lp_addr) { - tlb_debug("forcing full flush midx %d (" - TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", + tlb_debug("forcing full flush midx %d (%" + VADDR_PRIx "/%" VADDR_PRIx ")\n", midx, lp_addr, lp_mask); tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); } else { @@ -522,7 +519,7 @@ static void tlb_flush_page_locked(CPUArchState *env, int midx, * at @addr from the tlbs indicated by @idxmap from @cpu. */ static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu, - target_ulong addr, + vaddr addr, uint16_t idxmap) { CPUArchState *env = cpu->env_ptr; @@ -530,7 +527,7 @@ static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu, assert_cpu_is_self(cpu); - tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap); + tlb_debug("page addr: %" VADDR_PRIx " mmu_map:0x%x\n", addr, idxmap); qemu_spin_lock(&env_tlb(env)->c.lock); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { @@ -561,15 +558,15 @@ static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu, static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu, run_on_cpu_data data) { - target_ulong addr_and_idxmap = (target_ulong) data.target_ptr; - target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK; + vaddr addr_and_idxmap = data.target_ptr; + vaddr addr = addr_and_idxmap & TARGET_PAGE_MASK; uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK; tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap); } typedef struct { - target_ulong addr; + vaddr addr; uint16_t idxmap; } TLBFlushPageByMMUIdxData; @@ -592,9 +589,9 @@ static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu, g_free(d); } -void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap) +void tlb_flush_page_by_mmuidx(CPUState *cpu, vaddr addr, uint16_t idxmap) { - tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap); + tlb_debug("addr: %" VADDR_PRIx " mmu_idx:%" PRIx16 "\n", addr, idxmap); /* This should already be page aligned */ addr &= TARGET_PAGE_MASK; @@ -620,15 +617,15 @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap) } } -void tlb_flush_page(CPUState *cpu, target_ulong addr) +void tlb_flush_page(CPUState *cpu, vaddr addr) { tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS); } -void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr, +void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, vaddr addr, uint16_t idxmap) { - tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); + tlb_debug("addr: %" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap); /* This should already be page aligned */ addr &= TARGET_PAGE_MASK; @@ -660,16 +657,16 @@ void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr, tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap); } -void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr) +void tlb_flush_page_all_cpus(CPUState *src, vaddr addr) { tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS); } void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu, - target_ulong addr, + vaddr addr, uint16_t idxmap) { - tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); + tlb_debug("addr: %" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap); /* This should already be page aligned */ addr &= TARGET_PAGE_MASK; @@ -706,18 +703,18 @@ void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu, } } -void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr) +void tlb_flush_page_all_cpus_synced(CPUState *src, vaddr addr) { tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS); } static void tlb_flush_range_locked(CPUArchState *env, int midx, - target_ulong addr, target_ulong len, + vaddr addr, vaddr len, unsigned bits) { CPUTLBDesc *d = &env_tlb(env)->d[midx]; CPUTLBDescFast *f = &env_tlb(env)->f[midx]; - target_ulong mask = MAKE_64BIT_MASK(0, bits); + vaddr mask = MAKE_64BIT_MASK(0, bits); /* * If @bits is smaller than the tlb size, there may be multiple entries @@ -731,7 +728,7 @@ static void tlb_flush_range_locked(CPUArchState *env, int midx, */ if (mask < f->mask || len > f->mask) { tlb_debug("forcing full flush midx %d (" - TARGET_FMT_lx "/" TARGET_FMT_lx "+" TARGET_FMT_lx ")\n", + "%" VADDR_PRIx "/%" VADDR_PRIx "+%" VADDR_PRIx ")\n", midx, addr, mask, len); tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); return; @@ -744,14 +741,14 @@ static void tlb_flush_range_locked(CPUArchState *env, int midx, */ if (((addr + len - 1) & d->large_page_mask) == d->large_page_addr) { tlb_debug("forcing full flush midx %d (" - TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", + "%" VADDR_PRIx "/%" VADDR_PRIx ")\n", midx, d->large_page_addr, d->large_page_mask); tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); return; } - for (target_ulong i = 0; i < len; i += TARGET_PAGE_SIZE) { - target_ulong page = addr + i; + for (vaddr i = 0; i < len; i += TARGET_PAGE_SIZE) { + vaddr page = addr + i; CPUTLBEntry *entry = tlb_entry(env, midx, page); if (tlb_flush_entry_mask_locked(entry, page, mask)) { @@ -762,8 +759,8 @@ static void tlb_flush_range_locked(CPUArchState *env, int midx, } typedef struct { - target_ulong addr; - target_ulong len; + vaddr addr; + vaddr len; uint16_t idxmap; uint16_t bits; } TLBFlushRangeData; @@ -776,7 +773,7 @@ static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu, assert_cpu_is_self(cpu); - tlb_debug("range:" TARGET_FMT_lx "/%u+" TARGET_FMT_lx " mmu_map:0x%x\n", + tlb_debug("range: %" VADDR_PRIx "/%u+%" VADDR_PRIx " mmu_map:0x%x\n", d.addr, d.bits, d.len, d.idxmap); qemu_spin_lock(&env_tlb(env)->c.lock); @@ -801,7 +798,7 @@ static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu, * overlap the flushed pages, which includes the previous. */ d.addr -= TARGET_PAGE_SIZE; - for (target_ulong i = 0, n = d.len / TARGET_PAGE_SIZE + 1; i < n; i++) { + for (vaddr i = 0, n = d.len / TARGET_PAGE_SIZE + 1; i < n; i++) { tb_jmp_cache_clear_page(cpu, d.addr); d.addr += TARGET_PAGE_SIZE; } @@ -815,8 +812,8 @@ static void tlb_flush_range_by_mmuidx_async_1(CPUState *cpu, g_free(d); } -void tlb_flush_range_by_mmuidx(CPUState *cpu, target_ulong addr, - target_ulong len, uint16_t idxmap, +void tlb_flush_range_by_mmuidx(CPUState *cpu, vaddr addr, + vaddr len, uint16_t idxmap, unsigned bits) { TLBFlushRangeData d; @@ -851,14 +848,14 @@ void tlb_flush_range_by_mmuidx(CPUState *cpu, target_ulong addr, } } -void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr, +void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, vaddr addr, uint16_t idxmap, unsigned bits) { tlb_flush_range_by_mmuidx(cpu, addr, TARGET_PAGE_SIZE, idxmap, bits); } void tlb_flush_range_by_mmuidx_all_cpus(CPUState *src_cpu, - target_ulong addr, target_ulong len, + vaddr addr, vaddr len, uint16_t idxmap, unsigned bits) { TLBFlushRangeData d; @@ -898,16 +895,16 @@ void tlb_flush_range_by_mmuidx_all_cpus(CPUState *src_cpu, } void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *src_cpu, - target_ulong addr, - uint16_t idxmap, unsigned bits) + vaddr addr, uint16_t idxmap, + unsigned bits) { tlb_flush_range_by_mmuidx_all_cpus(src_cpu, addr, TARGET_PAGE_SIZE, idxmap, bits); } void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *src_cpu, - target_ulong addr, - target_ulong len, + vaddr addr, + vaddr len, uint16_t idxmap, unsigned bits) { @@ -949,7 +946,7 @@ void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *src_cpu, } void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu, - target_ulong addr, + vaddr addr, uint16_t idxmap, unsigned bits) { @@ -1055,32 +1052,32 @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) /* Called with tlb_c.lock held */ static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry, - target_ulong vaddr) + vaddr addr) { - if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) { - tlb_entry->addr_write = vaddr; + if (tlb_entry->addr_write == (addr | TLB_NOTDIRTY)) { + tlb_entry->addr_write = addr; } } /* update the TLB corresponding to virtual page vaddr so that it is no longer dirty */ -void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) +void tlb_set_dirty(CPUState *cpu, vaddr addr) { CPUArchState *env = cpu->env_ptr; int mmu_idx; assert_cpu_is_self(cpu); - vaddr &= TARGET_PAGE_MASK; + addr &= TARGET_PAGE_MASK; qemu_spin_lock(&env_tlb(env)->c.lock); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { - tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr); + tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, addr), addr); } for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { int k; for (k = 0; k < CPU_VTLB_SIZE; k++) { - tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr); + tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], addr); } } qemu_spin_unlock(&env_tlb(env)->c.lock); @@ -1089,20 +1086,20 @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) /* Our TLB does not support large pages, so remember the area covered by large pages and trigger a full TLB flush if these are invalidated. */ static void tlb_add_large_page(CPUArchState *env, int mmu_idx, - target_ulong vaddr, target_ulong size) + vaddr addr, uint64_t size) { - target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr; - target_ulong lp_mask = ~(size - 1); + vaddr lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr; + vaddr lp_mask = ~(size - 1); - if (lp_addr == (target_ulong)-1) { + if (lp_addr == (vaddr)-1) { /* No previous large page. */ - lp_addr = vaddr; + lp_addr = addr; } else { /* Extend the existing region to include the new page. This is a compromise between unnecessary flushes and the cost of maintaining a full variable size TLB. */ lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask; - while (((lp_addr ^ vaddr) & lp_mask) != 0) { + while (((lp_addr ^ addr) & lp_mask) != 0) { lp_mask <<= 1; } } @@ -1119,19 +1116,19 @@ static void tlb_add_large_page(CPUArchState *env, int mmu_idx, * critical section. */ void tlb_set_page_full(CPUState *cpu, int mmu_idx, - target_ulong vaddr, CPUTLBEntryFull *full) + vaddr addr, CPUTLBEntryFull *full) { CPUArchState *env = cpu->env_ptr; CPUTLB *tlb = env_tlb(env); CPUTLBDesc *desc = &tlb->d[mmu_idx]; MemoryRegionSection *section; unsigned int index; - target_ulong address; - target_ulong write_address; + vaddr address; + vaddr write_address; uintptr_t addend; CPUTLBEntry *te, tn; hwaddr iotlb, xlat, sz, paddr_page; - target_ulong vaddr_page; + vaddr addr_page; int asidx, wp_flags, prot; bool is_ram, is_romd; @@ -1141,9 +1138,9 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, sz = TARGET_PAGE_SIZE; } else { sz = (hwaddr)1 << full->lg_page_size; - tlb_add_large_page(env, mmu_idx, vaddr, sz); + tlb_add_large_page(env, mmu_idx, addr, sz); } - vaddr_page = vaddr & TARGET_PAGE_MASK; + addr_page = addr & TARGET_PAGE_MASK; paddr_page = full->phys_addr & TARGET_PAGE_MASK; prot = full->prot; @@ -1152,11 +1149,11 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, &xlat, &sz, full->attrs, &prot); assert(sz >= TARGET_PAGE_SIZE); - tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" HWADDR_FMT_plx + tlb_debug("vaddr=%" VADDR_PRIx " paddr=0x" HWADDR_FMT_plx " prot=%x idx=%d\n", - vaddr, full->phys_addr, prot, mmu_idx); + addr, full->phys_addr, prot, mmu_idx); - address = vaddr_page; + address = addr_page; if (full->lg_page_size < TARGET_PAGE_BITS) { /* Repeat the MMU check and TLB fill on every access. */ address |= TLB_INVALID_MASK; @@ -1204,11 +1201,11 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, } } - wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page, + wp_flags = cpu_watchpoint_address_matches(cpu, addr_page, TARGET_PAGE_SIZE); - index = tlb_index(env, mmu_idx, vaddr_page); - te = tlb_entry(env, mmu_idx, vaddr_page); + index = tlb_index(env, mmu_idx, addr_page); + te = tlb_entry(env, mmu_idx, addr_page); /* * Hold the TLB lock for the rest of the function. We could acquire/release @@ -1223,13 +1220,13 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, tlb->c.dirty |= 1 << mmu_idx; /* Make sure there's no cached translation for the new page. */ - tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page); + tlb_flush_vtlb_page_locked(env, mmu_idx, addr_page); /* * Only evict the old entry to the victim tlb if it's for a * different page; otherwise just overwrite the stale data. */ - if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) { + if (!tlb_hit_page_anyprot(te, addr_page) && !tlb_entry_is_empty(te)) { unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE; CPUTLBEntry *tv = &desc->vtable[vidx]; @@ -1253,11 +1250,11 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, * vaddr we add back in io_readx()/io_writex()/get_page_addr_code(). */ desc->fulltlb[index] = *full; - desc->fulltlb[index].xlat_section = iotlb - vaddr_page; + desc->fulltlb[index].xlat_section = iotlb - addr_page; desc->fulltlb[index].phys_addr = paddr_page; /* Now calculate the new entry */ - tn.addend = addend - vaddr_page; + tn.addend = addend - addr_page; if (prot & PAGE_READ) { tn.addr_read = address; if (wp_flags & BP_MEM_READ) { @@ -1289,9 +1286,9 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, qemu_spin_unlock(&tlb->c.lock); } -void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, +void tlb_set_page_with_attrs(CPUState *cpu, vaddr addr, hwaddr paddr, MemTxAttrs attrs, int prot, - int mmu_idx, target_ulong size) + int mmu_idx, uint64_t size) { CPUTLBEntryFull full = { .phys_addr = paddr, @@ -1301,14 +1298,14 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, }; assert(is_power_of_2(size)); - tlb_set_page_full(cpu, mmu_idx, vaddr, &full); + tlb_set_page_full(cpu, mmu_idx, addr, &full); } -void tlb_set_page(CPUState *cpu, target_ulong vaddr, +void tlb_set_page(CPUState *cpu, vaddr addr, hwaddr paddr, int prot, - int mmu_idx, target_ulong size) + int mmu_idx, uint64_t size) { - tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED, + tlb_set_page_with_attrs(cpu, addr, paddr, MEMTXATTRS_UNSPECIFIED, prot, mmu_idx, size); } @@ -1317,7 +1314,7 @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr, * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must * be discarded and looked up again (e.g. via tlb_entry()). */ -static void tlb_fill(CPUState *cpu, target_ulong addr, int size, +static void tlb_fill(CPUState *cpu, vaddr addr, int size, MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) { bool ok; @@ -1357,7 +1354,7 @@ static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr, } static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full, - int mmu_idx, target_ulong addr, uintptr_t retaddr, + int mmu_idx, vaddr addr, uintptr_t retaddr, MMUAccessType access_type, MemOp op) { CPUState *cpu = env_cpu(env); @@ -1407,7 +1404,7 @@ static void save_iotlb_data(CPUState *cs, MemoryRegionSection *section, } static void io_writex(CPUArchState *env, CPUTLBEntryFull *full, - int mmu_idx, uint64_t val, target_ulong addr, + int mmu_idx, uint64_t val, vaddr addr, uintptr_t retaddr, MemOp op) { CPUState *cpu = env_cpu(env); @@ -1449,7 +1446,7 @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full, /* Return true if ADDR is present in the victim tlb, and has been copied back to the main tlb. */ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, - MMUAccessType access_type, target_ulong page) + MMUAccessType access_type, vaddr page) { size_t vidx; @@ -1691,13 +1688,13 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, * from the same thread (which a mem callback will be) this is safe. */ -bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx, +bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, int mmu_idx, bool is_store, struct qemu_plugin_hwaddr *data) { CPUArchState *env = cpu->env_ptr; CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); uintptr_t index = tlb_index(env, mmu_idx, addr); - target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read; + vaddr tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read; if (likely(tlb_hit(tlb_addr, addr))) { /* We must have an iotlb entry for MMIO */ diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c index 892eecda2d..3541419845 100644 --- a/accel/tcg/tb-maint.c +++ b/accel/tcg/tb-maint.c @@ -98,7 +98,7 @@ static void tb_remove_all(void) /* Call with mmap_lock held. */ static void tb_record(TranslationBlock *tb, PageDesc *p1, PageDesc *p2) { - target_ulong addr; + vaddr addr; int flags; assert_memory_lock(); diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h index 4cb77c8dec..e6a079402e 100644 --- a/include/exec/cpu-defs.h +++ b/include/exec/cpu-defs.h @@ -147,8 +147,8 @@ typedef struct CPUTLBDesc { * we must flush the entire tlb. The region is matched if * (addr & large_page_mask) == large_page_addr. */ - target_ulong large_page_addr; - target_ulong large_page_mask; + vaddr large_page_addr; + vaddr large_page_mask; /* host time (in ns) at the beginning of the time window */ int64_t window_begin_ns; /* maximum number of entries observed in the window */ diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 698943d58f..f5508e242b 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -94,7 +94,7 @@ void tlb_destroy(CPUState *cpu); * Flush one page from the TLB of the specified CPU, for all * MMU indexes. */ -void tlb_flush_page(CPUState *cpu, target_ulong addr); +void tlb_flush_page(CPUState *cpu, vaddr addr); /** * tlb_flush_page_all_cpus: * @cpu: src CPU of the flush @@ -103,7 +103,7 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr); * Flush one page from the TLB of the specified CPU, for all * MMU indexes. */ -void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr); +void tlb_flush_page_all_cpus(CPUState *src, vaddr addr); /** * tlb_flush_page_all_cpus_synced: * @cpu: src CPU of the flush @@ -115,7 +115,7 @@ void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr); * the source vCPUs safe work is complete. This will depend on when * the guests translation ends the TB. */ -void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr); +void tlb_flush_page_all_cpus_synced(CPUState *src, vaddr addr); /** * tlb_flush: * @cpu: CPU whose TLB should be flushed @@ -150,7 +150,7 @@ void tlb_flush_all_cpus_synced(CPUState *src_cpu); * Flush one page from the TLB of the specified CPU, for the specified * MMU indexes. */ -void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, +void tlb_flush_page_by_mmuidx(CPUState *cpu, vaddr addr, uint16_t idxmap); /** * tlb_flush_page_by_mmuidx_all_cpus: @@ -161,7 +161,7 @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, * Flush one page from the TLB of all CPUs, for the specified * MMU indexes. */ -void tlb_flush_page_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr, +void tlb_flush_page_by_mmuidx_all_cpus(CPUState *cpu, vaddr addr, uint16_t idxmap); /** * tlb_flush_page_by_mmuidx_all_cpus_synced: @@ -175,7 +175,7 @@ void tlb_flush_page_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr, * complete once the source vCPUs safe work is complete. This will * depend on when the guests translation ends the TB. */ -void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu, target_ulong addr, +void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu, vaddr addr, uint16_t idxmap); /** * tlb_flush_by_mmuidx: @@ -218,14 +218,14 @@ void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, uint16_t idxmap); * * Similar to tlb_flush_page_mask, but with a bitmap of indexes. */ -void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr, +void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, vaddr addr, uint16_t idxmap, unsigned bits); /* Similarly, with broadcast and syncing. */ -void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr, +void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *cpu, vaddr addr, uint16_t idxmap, unsigned bits); void tlb_flush_page_bits_by_mmuidx_all_cpus_synced - (CPUState *cpu, target_ulong addr, uint16_t idxmap, unsigned bits); + (CPUState *cpu, vaddr addr, uint16_t idxmap, unsigned bits); /** * tlb_flush_range_by_mmuidx @@ -238,17 +238,17 @@ void tlb_flush_page_bits_by_mmuidx_all_cpus_synced * For each mmuidx in @idxmap, flush all pages within [@addr,@addr+@len), * comparing only the low @bits worth of each virtual page. */ -void tlb_flush_range_by_mmuidx(CPUState *cpu, target_ulong addr, - target_ulong len, uint16_t idxmap, +void tlb_flush_range_by_mmuidx(CPUState *cpu, vaddr addr, + vaddr len, uint16_t idxmap, unsigned bits); /* Similarly, with broadcast and syncing. */ -void tlb_flush_range_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr, - target_ulong len, uint16_t idxmap, +void tlb_flush_range_by_mmuidx_all_cpus(CPUState *cpu, vaddr addr, + vaddr len, uint16_t idxmap, unsigned bits); void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu, - target_ulong addr, - target_ulong len, + vaddr addr, + vaddr len, uint16_t idxmap, unsigned bits); @@ -256,7 +256,7 @@ void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu, * tlb_set_page_full: * @cpu: CPU context * @mmu_idx: mmu index of the tlb to modify - * @vaddr: virtual address of the entry to add + * @addr: virtual address of the entry to add * @full: the details of the tlb entry * * Add an entry to @cpu tlb index @mmu_idx. All of the fields of @@ -271,13 +271,13 @@ void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu, * single TARGET_PAGE_SIZE region is mapped; @full->lg_page_size is only * used by tlb_flush_page. */ -void tlb_set_page_full(CPUState *cpu, int mmu_idx, target_ulong vaddr, +void tlb_set_page_full(CPUState *cpu, int mmu_idx, vaddr addr, CPUTLBEntryFull *full); /** * tlb_set_page_with_attrs: * @cpu: CPU to add this TLB entry for - * @vaddr: virtual address of page to add entry for + * @addr: virtual address of page to add entry for * @paddr: physical address of the page * @attrs: memory transaction attributes * @prot: access permissions (PAGE_READ/PAGE_WRITE/PAGE_EXEC bits) @@ -285,7 +285,7 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, target_ulong vaddr, * @size: size of the page in bytes * * Add an entry to this CPU's TLB (a mapping from virtual address - * @vaddr to physical address @paddr) with the specified memory + * @addr to physical address @paddr) with the specified memory * transaction attributes. This is generally called by the target CPU * specific code after it has been called through the tlb_fill() * entry point and performed a successful page table walk to find @@ -296,18 +296,18 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, target_ulong vaddr, * single TARGET_PAGE_SIZE region is mapped; the supplied @size is only * used by tlb_flush_page. */ -void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, +void tlb_set_page_with_attrs(CPUState *cpu, vaddr addr, hwaddr paddr, MemTxAttrs attrs, - int prot, int mmu_idx, target_ulong size); + int prot, int mmu_idx, vaddr size); /* tlb_set_page: * * This function is equivalent to calling tlb_set_page_with_attrs() * with an @attrs argument of MEMTXATTRS_UNSPECIFIED. It's provided * as a convenience for CPUs which don't use memory transaction attributes. */ -void tlb_set_page(CPUState *cpu, target_ulong vaddr, +void tlb_set_page(CPUState *cpu, vaddr addr, hwaddr paddr, int prot, - int mmu_idx, target_ulong size); + int mmu_idx, vaddr size); #else static inline void tlb_init(CPUState *cpu) { @@ -315,14 +315,13 @@ static inline void tlb_init(CPUState *cpu) static inline void tlb_destroy(CPUState *cpu) { } -static inline void tlb_flush_page(CPUState *cpu, target_ulong addr) +static inline void tlb_flush_page(CPUState *cpu, vaddr addr) { } -static inline void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr) +static inline void tlb_flush_page_all_cpus(CPUState *src, vaddr addr) { } -static inline void tlb_flush_page_all_cpus_synced(CPUState *src, - target_ulong addr) +static inline void tlb_flush_page_all_cpus_synced(CPUState *src, vaddr addr) { } static inline void tlb_flush(CPUState *cpu) @@ -335,7 +334,7 @@ static inline void tlb_flush_all_cpus_synced(CPUState *src_cpu) { } static inline void tlb_flush_page_by_mmuidx(CPUState *cpu, - target_ulong addr, uint16_t idxmap) + vaddr addr, uint16_t idxmap) { } @@ -343,12 +342,12 @@ static inline void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap) { } static inline void tlb_flush_page_by_mmuidx_all_cpus(CPUState *cpu, - target_ulong addr, + vaddr addr, uint16_t idxmap) { } static inline void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu, - target_ulong addr, + vaddr addr, uint16_t idxmap) { } @@ -361,37 +360,37 @@ static inline void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, { } static inline void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, - target_ulong addr, + vaddr addr, uint16_t idxmap, unsigned bits) { } static inline void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *cpu, - target_ulong addr, + vaddr addr, uint16_t idxmap, unsigned bits) { } static inline void -tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *cpu, target_ulong addr, +tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *cpu, vaddr addr, uint16_t idxmap, unsigned bits) { } -static inline void tlb_flush_range_by_mmuidx(CPUState *cpu, target_ulong addr, - target_ulong len, uint16_t idxmap, +static inline void tlb_flush_range_by_mmuidx(CPUState *cpu, vaddr addr, + vaddr len, uint16_t idxmap, unsigned bits) { } static inline void tlb_flush_range_by_mmuidx_all_cpus(CPUState *cpu, - target_ulong addr, - target_ulong len, + vaddr addr, + vaddr len, uint16_t idxmap, unsigned bits) { } static inline void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu, - target_ulong addr, - target_long len, + vaddr addr, + vaddr len, uint16_t idxmap, unsigned bits) { @@ -663,7 +662,7 @@ static inline void mmap_lock(void) {} static inline void mmap_unlock(void) {} void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length); -void tlb_set_dirty(CPUState *cpu, target_ulong vaddr); +void tlb_set_dirty(CPUState *cpu, vaddr addr); MemoryRegionSection * address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr, diff --git a/include/qemu/plugin-memory.h b/include/qemu/plugin-memory.h index 6fd539022a..43165f2452 100644 --- a/include/qemu/plugin-memory.h +++ b/include/qemu/plugin-memory.h @@ -37,7 +37,7 @@ struct qemu_plugin_hwaddr { * It would only fail if not called from an instrumented memory access * which would be an abuse of the API. */ -bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx, +bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, int mmu_idx, bool is_store, struct qemu_plugin_hwaddr *data); #endif /* PLUGIN_MEMORY_H */ From 256d11f9ba23c74db75d50b24e3357d583d7885b Mon Sep 17 00:00:00 2001 From: Anton Johansson Date: Wed, 21 Jun 2023 15:56:23 +0200 Subject: [PATCH 02/22] accel/tcg/translate-all.c: Widen pc and cs_base Signed-off-by: Anton Johansson Reviewed-by: Richard Henderson Message-Id: <20230621135633.1649-3-anjo@rev.ng> Signed-off-by: Richard Henderson --- accel/tcg/internal.h | 6 +++--- accel/tcg/translate-all.c | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h index 65380ccb42..91f308bdfa 100644 --- a/accel/tcg/internal.h +++ b/accel/tcg/internal.h @@ -42,8 +42,8 @@ void tb_invalidate_phys_range_fast(ram_addr_t ram_addr, G_NORETURN void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr); #endif /* CONFIG_SOFTMMU */ -TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc, - target_ulong cs_base, uint32_t flags, +TranslationBlock *tb_gen_code(CPUState *cpu, vaddr pc, + uint64_t cs_base, uint32_t flags, int cflags); void page_init(void); void tb_htable_init(void); @@ -55,7 +55,7 @@ void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, uintptr_t host_pc); /* Return the current PC from CPU, which may be cached in TB. */ -static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb) +static inline vaddr log_pc(CPUState *cpu, const TranslationBlock *tb) { if (tb_cflags(tb) & CF_PCREL) { return cpu->cc->get_pc(cpu); diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index c4d081f5ad..03caf62459 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -274,7 +274,7 @@ void page_init(void) * Return the size of the generated code, or negative on error. */ static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb, - target_ulong pc, void *host_pc, + vaddr pc, void *host_pc, int *max_insns, int64_t *ti) { int ret = sigsetjmp(tcg_ctx->jmp_trans, 0); @@ -302,7 +302,7 @@ static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb, /* Called with mmap_lock held for user mode emulation. */ TranslationBlock *tb_gen_code(CPUState *cpu, - target_ulong pc, target_ulong cs_base, + vaddr pc, uint64_t cs_base, uint32_t flags, int cflags) { CPUArchState *env = cpu->env_ptr; @@ -634,10 +634,10 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n; if (qemu_loglevel_mask(CPU_LOG_EXEC)) { - target_ulong pc = log_pc(cpu, tb); + vaddr pc = log_pc(cpu, tb); if (qemu_log_in_addr_range(pc)) { - qemu_log("cpu_io_recompile: rewound execution of TB to " - TARGET_FMT_lx "\n", pc); + qemu_log("cpu_io_recompile: rewound execution of TB to %" + VADDR_PRIx "\n", pc); } } From bb5de52524c6c4b7da5623c5b19d9d6dc8405aa0 Mon Sep 17 00:00:00 2001 From: Anton Johansson Date: Wed, 21 Jun 2023 15:56:24 +0200 Subject: [PATCH 03/22] target: Widen pc/cs_base in cpu_get_tb_cpu_state Signed-off-by: Anton Johansson Reviewed-by: Richard Henderson Message-Id: <20230621135633.1649-4-anjo@rev.ng> Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 9 ++++++--- accel/tcg/translate-all.c | 3 ++- target/alpha/cpu.h | 4 ++-- target/arm/cpu.h | 4 ++-- target/arm/helper.c | 4 ++-- target/avr/cpu.h | 4 ++-- target/cris/cpu.h | 4 ++-- target/hexagon/cpu.h | 4 ++-- target/hppa/cpu.h | 5 ++--- target/i386/cpu.h | 4 ++-- target/loongarch/cpu.h | 6 ++---- target/m68k/cpu.h | 4 ++-- target/microblaze/cpu.h | 4 ++-- target/mips/cpu.h | 4 ++-- target/nios2/cpu.h | 4 ++-- target/openrisc/cpu.h | 5 ++--- target/ppc/cpu.h | 8 ++++---- target/ppc/helper_regs.c | 4 ++-- target/riscv/cpu.h | 4 ++-- target/riscv/cpu_helper.c | 4 ++-- target/rx/cpu.h | 4 ++-- target/s390x/cpu.h | 4 ++-- target/sh4/cpu.h | 4 ++-- target/sparc/cpu.h | 4 ++-- target/tricore/cpu.h | 4 ++-- target/xtensa/cpu.h | 4 ++-- 26 files changed, 58 insertions(+), 58 deletions(-) diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 179847b294..4d952a6cc2 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -408,7 +408,8 @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env) { CPUState *cpu = env_cpu(env); TranslationBlock *tb; - target_ulong cs_base, pc; + vaddr pc; + uint64_t cs_base; uint32_t flags, cflags; cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); @@ -529,7 +530,8 @@ void cpu_exec_step_atomic(CPUState *cpu) { CPUArchState *env = cpu->env_ptr; TranslationBlock *tb; - target_ulong cs_base, pc; + vaddr pc; + uint64_t cs_base; uint32_t flags, cflags; int tb_exit; @@ -942,7 +944,8 @@ cpu_exec_loop(CPUState *cpu, SyncClocks *sc) while (!cpu_handle_interrupt(cpu, &last_tb)) { TranslationBlock *tb; - target_ulong cs_base, pc; + vaddr pc; + uint64_t cs_base; uint32_t flags, cflags; cpu_get_tb_cpu_state(cpu->env_ptr, &pc, &cs_base, &flags); diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 03caf62459..03c49baf1c 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -580,7 +580,8 @@ void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr) /* The exception probably happened in a helper. The CPU state should have been saved before calling it. Fetch the PC from there. */ CPUArchState *env = cpu->env_ptr; - target_ulong pc, cs_base; + vaddr pc; + uint64_t cs_base; tb_page_addr_t addr; uint32_t flags; diff --git a/target/alpha/cpu.h b/target/alpha/cpu.h index 5e67304d81..fcd20bfd3a 100644 --- a/target/alpha/cpu.h +++ b/target/alpha/cpu.h @@ -462,8 +462,8 @@ void alpha_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, MemTxResult response, uintptr_t retaddr); #endif -static inline void cpu_get_tb_cpu_state(CPUAlphaState *env, target_ulong *pc, - target_ulong *cs_base, uint32_t *pflags) +static inline void cpu_get_tb_cpu_state(CPUAlphaState *env, vaddr *pc, + uint64_t *cs_base, uint32_t *pflags) { *pc = env->pc; *cs_base = 0; diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 11c3850ad9..00e675f58f 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -3220,8 +3220,8 @@ static inline bool arm_cpu_bswap_data(CPUARMState *env) } #endif -void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, - target_ulong *cs_base, uint32_t *flags); +void cpu_get_tb_cpu_state(CPUARMState *env, vaddr *pc, + uint64_t *cs_base, uint32_t *flags); enum { QEMU_PSCI_CONDUIT_DISABLED = 0, diff --git a/target/arm/helper.c b/target/arm/helper.c index 323cadd3c8..d08c058e42 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -11945,8 +11945,8 @@ static bool mve_no_pred(CPUARMState *env) return true; } -void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, - target_ulong *cs_base, uint32_t *pflags) +void cpu_get_tb_cpu_state(CPUARMState *env, vaddr *pc, + uint64_t *cs_base, uint32_t *pflags) { CPUARMTBFlags flags; diff --git a/target/avr/cpu.h b/target/avr/cpu.h index f19dd72926..7225174668 100644 --- a/target/avr/cpu.h +++ b/target/avr/cpu.h @@ -190,8 +190,8 @@ enum { TB_FLAGS_SKIP = 2, }; -static inline void cpu_get_tb_cpu_state(CPUAVRState *env, target_ulong *pc, - target_ulong *cs_base, uint32_t *pflags) +static inline void cpu_get_tb_cpu_state(CPUAVRState *env, vaddr *pc, + uint64_t *cs_base, uint32_t *pflags) { uint32_t flags = 0; diff --git a/target/cris/cpu.h b/target/cris/cpu.h index 71fa1f96e0..8e37c6e50d 100644 --- a/target/cris/cpu.h +++ b/target/cris/cpu.h @@ -266,8 +266,8 @@ static inline int cpu_mmu_index (CPUCRISState *env, bool ifetch) #include "exec/cpu-all.h" -static inline void cpu_get_tb_cpu_state(CPUCRISState *env, target_ulong *pc, - target_ulong *cs_base, uint32_t *flags) +static inline void cpu_get_tb_cpu_state(CPUCRISState *env, vaddr *pc, + uint64_t *cs_base, uint32_t *flags) { *pc = env->pc; *cs_base = 0; diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index bfcb1057dd..daef5c3f00 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -153,8 +153,8 @@ struct ArchCPU { FIELD(TB_FLAGS, IS_TIGHT_LOOP, 0, 1) -static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, target_ulong *pc, - target_ulong *cs_base, uint32_t *flags) +static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, vaddr *pc, + uint64_t *cs_base, uint32_t *flags) { uint32_t hex_flags = 0; *pc = env->gpr[HEX_REG_PC]; diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h index b595ef25a9..7373177b55 100644 --- a/target/hppa/cpu.h +++ b/target/hppa/cpu.h @@ -268,9 +268,8 @@ static inline target_ulong hppa_form_gva(CPUHPPAState *env, uint64_t spc, #define TB_FLAG_PRIV_SHIFT 8 #define TB_FLAG_UNALIGN 0x400 -static inline void cpu_get_tb_cpu_state(CPUHPPAState *env, target_ulong *pc, - target_ulong *cs_base, - uint32_t *pflags) +static inline void cpu_get_tb_cpu_state(CPUHPPAState *env, vaddr *pc, + uint64_t *cs_base, uint32_t *pflags) { uint32_t flags = env->psw_n * PSW_N; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index cd047e0410..2c9b0d2ebc 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -2275,8 +2275,8 @@ static inline int cpu_mmu_index_kernel(CPUX86State *env) #include "hw/i386/apic.h" #endif -static inline void cpu_get_tb_cpu_state(CPUX86State *env, target_ulong *pc, - target_ulong *cs_base, uint32_t *flags) +static inline void cpu_get_tb_cpu_state(CPUX86State *env, vaddr *pc, + uint64_t *cs_base, uint32_t *flags) { *cs_base = env->segs[R_CS].base; *pc = *cs_base + env->eip; diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h index b23f38c3d5..ed04027af1 100644 --- a/target/loongarch/cpu.h +++ b/target/loongarch/cpu.h @@ -427,10 +427,8 @@ static inline int cpu_mmu_index(CPULoongArchState *env, bool ifetch) #define HW_FLAGS_EUEN_FPE 0x04 #define HW_FLAGS_EUEN_SXE 0x08 -static inline void cpu_get_tb_cpu_state(CPULoongArchState *env, - target_ulong *pc, - target_ulong *cs_base, - uint32_t *flags) +static inline void cpu_get_tb_cpu_state(CPULoongArchState *env, vaddr *pc, + uint64_t *cs_base, uint32_t *flags) { *pc = env->pc; *cs_base = 0; diff --git a/target/m68k/cpu.h b/target/m68k/cpu.h index 048d5aae2b..cf70282717 100644 --- a/target/m68k/cpu.h +++ b/target/m68k/cpu.h @@ -601,8 +601,8 @@ void m68k_cpu_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr, #define TB_FLAGS_TRACE 16 #define TB_FLAGS_TRACE_BIT (1 << TB_FLAGS_TRACE) -static inline void cpu_get_tb_cpu_state(CPUM68KState *env, target_ulong *pc, - target_ulong *cs_base, uint32_t *flags) +static inline void cpu_get_tb_cpu_state(CPUM68KState *env, vaddr *pc, + uint64_t *cs_base, uint32_t *flags) { *pc = env->pc; *cs_base = 0; diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h index 88324d0bc1..3525de144c 100644 --- a/target/microblaze/cpu.h +++ b/target/microblaze/cpu.h @@ -401,8 +401,8 @@ void mb_tcg_init(void); /* Ensure there is no overlap between the two masks. */ QEMU_BUILD_BUG_ON(MSR_TB_MASK & IFLAGS_TB_MASK); -static inline void cpu_get_tb_cpu_state(CPUMBState *env, target_ulong *pc, - target_ulong *cs_base, uint32_t *flags) +static inline void cpu_get_tb_cpu_state(CPUMBState *env, vaddr *pc, + uint64_t *cs_base, uint32_t *flags) { *pc = env->pc; *flags = (env->iflags & IFLAGS_TB_MASK) | (env->msr & MSR_TB_MASK); diff --git a/target/mips/cpu.h b/target/mips/cpu.h index 142c55af47..a3bc646976 100644 --- a/target/mips/cpu.h +++ b/target/mips/cpu.h @@ -1313,8 +1313,8 @@ void itc_reconfigure(struct MIPSITUState *tag); /* helper.c */ target_ulong exception_resume_pc(CPUMIPSState *env); -static inline void cpu_get_tb_cpu_state(CPUMIPSState *env, target_ulong *pc, - target_ulong *cs_base, uint32_t *flags) +static inline void cpu_get_tb_cpu_state(CPUMIPSState *env, vaddr *pc, + uint64_t *cs_base, uint32_t *flags) { *pc = env->active_tc.PC; *cs_base = 0; diff --git a/target/nios2/cpu.h b/target/nios2/cpu.h index 20042c4332..477a3161fd 100644 --- a/target/nios2/cpu.h +++ b/target/nios2/cpu.h @@ -302,8 +302,8 @@ FIELD(TBFLAGS, CRS0, 0, 1) /* Set if CRS == 0. */ FIELD(TBFLAGS, U, 1, 1) /* Overlaps CR_STATUS_U */ FIELD(TBFLAGS, R0_0, 2, 1) /* Set if R0 == 0. */ -static inline void cpu_get_tb_cpu_state(CPUNios2State *env, target_ulong *pc, - target_ulong *cs_base, uint32_t *flags) +static inline void cpu_get_tb_cpu_state(CPUNios2State *env, vaddr *pc, + uint64_t *cs_base, uint32_t *flags) { unsigned crs = FIELD_EX32(env->ctrl[CR_STATUS], CR_STATUS, CRS); diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h index f16e8b3274..92c38f54c2 100644 --- a/target/openrisc/cpu.h +++ b/target/openrisc/cpu.h @@ -367,9 +367,8 @@ static inline void cpu_set_gpr(CPUOpenRISCState *env, int i, uint32_t val) env->shadow_gpr[0][i] = val; } -static inline void cpu_get_tb_cpu_state(CPUOpenRISCState *env, - target_ulong *pc, - target_ulong *cs_base, uint32_t *flags) +static inline void cpu_get_tb_cpu_state(CPUOpenRISCState *env, vaddr *pc, + uint64_t *cs_base, uint32_t *flags) { *pc = env->pc; *cs_base = 0; diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 4138a25801..94497aa115 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -2508,11 +2508,11 @@ void cpu_write_xer(CPUPPCState *env, target_ulong xer); #define is_book3s_arch2x(ctx) (!!((ctx)->insns_flags & PPC_SEGMENT_64B)) #ifdef CONFIG_DEBUG_TCG -void cpu_get_tb_cpu_state(CPUPPCState *env, target_ulong *pc, - target_ulong *cs_base, uint32_t *flags); +void cpu_get_tb_cpu_state(CPUPPCState *env, vaddr *pc, + uint64_t *cs_base, uint32_t *flags); #else -static inline void cpu_get_tb_cpu_state(CPUPPCState *env, target_ulong *pc, - target_ulong *cs_base, uint32_t *flags) +static inline void cpu_get_tb_cpu_state(CPUPPCState *env, vaddr *pc, + uint64_t *cs_base, uint32_t *flags) { *pc = env->nip; *cs_base = 0; diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c index e27f4a75a4..f380342d4d 100644 --- a/target/ppc/helper_regs.c +++ b/target/ppc/helper_regs.c @@ -218,8 +218,8 @@ void hreg_update_pmu_hflags(CPUPPCState *env) } #ifdef CONFIG_DEBUG_TCG -void cpu_get_tb_cpu_state(CPUPPCState *env, target_ulong *pc, - target_ulong *cs_base, uint32_t *flags) +void cpu_get_tb_cpu_state(CPUPPCState *env, vaddr *pc, + uint64_t *cs_base, uint32_t *flags) { uint32_t hflags_current = env->hflags; uint32_t hflags_rebuilt; diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index e3e08d315f..7bff1d47f6 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -587,8 +587,8 @@ static inline uint32_t vext_get_vlmax(RISCVCPU *cpu, target_ulong vtype) return cpu->cfg.vlen >> (sew + 3 - lmul); } -void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc, - target_ulong *cs_base, uint32_t *pflags); +void cpu_get_tb_cpu_state(CPURISCVState *env, vaddr *pc, + uint64_t *cs_base, uint32_t *pflags); void riscv_cpu_update_mask(CPURISCVState *env); diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c index 90cef9856d..a944f25694 100644 --- a/target/riscv/cpu_helper.c +++ b/target/riscv/cpu_helper.c @@ -61,8 +61,8 @@ int riscv_cpu_mmu_index(CPURISCVState *env, bool ifetch) #endif } -void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc, - target_ulong *cs_base, uint32_t *pflags) +void cpu_get_tb_cpu_state(CPURISCVState *env, vaddr *pc, + uint64_t *cs_base, uint32_t *pflags) { CPUState *cs = env_cpu(env); RISCVCPU *cpu = RISCV_CPU(cs); diff --git a/target/rx/cpu.h b/target/rx/cpu.h index 555d230f24..7f03ffcfed 100644 --- a/target/rx/cpu.h +++ b/target/rx/cpu.h @@ -143,8 +143,8 @@ void rx_cpu_unpack_psw(CPURXState *env, uint32_t psw, int rte); #define RX_CPU_IRQ 0 #define RX_CPU_FIR 1 -static inline void cpu_get_tb_cpu_state(CPURXState *env, target_ulong *pc, - target_ulong *cs_base, uint32_t *flags) +static inline void cpu_get_tb_cpu_state(CPURXState *env, vaddr *pc, + uint64_t *cs_base, uint32_t *flags) { *pc = env->pc; *cs_base = 0; diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h index f130c29f83..eb5b65b7d3 100644 --- a/target/s390x/cpu.h +++ b/target/s390x/cpu.h @@ -378,8 +378,8 @@ static inline int cpu_mmu_index(CPUS390XState *env, bool ifetch) #endif } -static inline void cpu_get_tb_cpu_state(CPUS390XState* env, target_ulong *pc, - target_ulong *cs_base, uint32_t *flags) +static inline void cpu_get_tb_cpu_state(CPUS390XState *env, vaddr *pc, + uint64_t *cs_base, uint32_t *flags) { if (env->psw.addr & 1) { /* diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h index 02bfd612ea..1399d3840f 100644 --- a/target/sh4/cpu.h +++ b/target/sh4/cpu.h @@ -368,8 +368,8 @@ static inline void cpu_write_sr(CPUSH4State *env, target_ulong sr) env->sr = sr & ~((1u << SR_M) | (1u << SR_Q) | (1u << SR_T)); } -static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc, - target_ulong *cs_base, uint32_t *flags) +static inline void cpu_get_tb_cpu_state(CPUSH4State *env, vaddr *pc, + uint64_t *cs_base, uint32_t *flags) { *pc = env->pc; /* For a gUSA region, notice the end of the region. */ diff --git a/target/sparc/cpu.h b/target/sparc/cpu.h index 3d090e8278..95d2d0da71 100644 --- a/target/sparc/cpu.h +++ b/target/sparc/cpu.h @@ -762,8 +762,8 @@ trap_state* cpu_tsptr(CPUSPARCState* env); #define TB_FLAG_HYPER (1 << 7) #define TB_FLAG_ASI_SHIFT 24 -static inline void cpu_get_tb_cpu_state(CPUSPARCState *env, target_ulong *pc, - target_ulong *cs_base, uint32_t *pflags) +static inline void cpu_get_tb_cpu_state(CPUSPARCState *env, vaddr *pc, + uint64_t *cs_base, uint32_t *pflags) { uint32_t flags; *pc = env->pc; diff --git a/target/tricore/cpu.h b/target/tricore/cpu.h index 257fcf3cee..a50b91cc36 100644 --- a/target/tricore/cpu.h +++ b/target/tricore/cpu.h @@ -384,8 +384,8 @@ FIELD(TB_FLAGS, PRIV, 0, 2) void cpu_state_reset(CPUTriCoreState *s); void tricore_tcg_init(void); -static inline void cpu_get_tb_cpu_state(CPUTriCoreState *env, target_ulong *pc, - target_ulong *cs_base, uint32_t *flags) +static inline void cpu_get_tb_cpu_state(CPUTriCoreState *env, vaddr *pc, + uint64_t *cs_base, uint32_t *flags) { uint32_t new_flags = 0; *pc = env->PC; diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h index b7a54711a6..87fe992ba6 100644 --- a/target/xtensa/cpu.h +++ b/target/xtensa/cpu.h @@ -727,8 +727,8 @@ static inline int cpu_mmu_index(CPUXtensaState *env, bool ifetch) #include "exec/cpu-all.h" -static inline void cpu_get_tb_cpu_state(CPUXtensaState *env, target_ulong *pc, - target_ulong *cs_base, uint32_t *flags) +static inline void cpu_get_tb_cpu_state(CPUXtensaState *env, vaddr *pc, + uint64_t *cs_base, uint32_t *flags) { *pc = env->pc; *cs_base = 0; From 9e39de980fdf0dbb083a5c78b71bf2c1bdfa0499 Mon Sep 17 00:00:00 2001 From: Anton Johansson Date: Wed, 21 Jun 2023 15:56:25 +0200 Subject: [PATCH 04/22] accel/tcg/cputlb.c: Widen CPUTLBEntry access functions Signed-off-by: Anton Johansson Reviewed-by: Richard Henderson Message-Id: <20230621135633.1649-5-anjo@rev.ng> Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 8 ++++---- include/exec/cpu_ldst.h | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 5caeccb52d..ac990a1526 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1453,7 +1453,7 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, assert_cpu_is_self(env_cpu(env)); for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx]; - target_ulong cmp = tlb_read_idx(vtlb, access_type); + uint64_t cmp = tlb_read_idx(vtlb, access_type); if (cmp == page) { /* Found entry in victim tlb, swap tlb and iotlb. */ @@ -1507,7 +1507,7 @@ static int probe_access_internal(CPUArchState *env, target_ulong addr, { uintptr_t index = tlb_index(env, mmu_idx, addr); CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); - target_ulong tlb_addr = tlb_read_idx(entry, access_type); + uint64_t tlb_addr = tlb_read_idx(entry, access_type); target_ulong page_addr = addr & TARGET_PAGE_MASK; int flags = TLB_FLAGS_MASK; @@ -1694,7 +1694,7 @@ bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, int mmu_idx, CPUArchState *env = cpu->env_ptr; CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); uintptr_t index = tlb_index(env, mmu_idx, addr); - vaddr tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read; + uint64_t tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read; if (likely(tlb_hit(tlb_addr, addr))) { /* We must have an iotlb entry for MMIO */ @@ -1759,7 +1759,7 @@ static bool mmu_lookup1(CPUArchState *env, MMULookupPageData *data, target_ulong addr = data->addr; uintptr_t index = tlb_index(env, mmu_idx, addr); CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); - target_ulong tlb_addr = tlb_read_idx(entry, access_type); + uint64_t tlb_addr = tlb_read_idx(entry, access_type); bool maybe_resized = false; /* If the TLB entry is for a different page, reload and try again. */ diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h index 896f305ff3..645476f0e5 100644 --- a/include/exec/cpu_ldst.h +++ b/include/exec/cpu_ldst.h @@ -328,8 +328,8 @@ static inline void clear_helper_retaddr(void) #include "tcg/oversized-guest.h" -static inline target_ulong tlb_read_idx(const CPUTLBEntry *entry, - MMUAccessType access_type) +static inline uint64_t tlb_read_idx(const CPUTLBEntry *entry, + MMUAccessType access_type) { /* Do not rearrange the CPUTLBEntry structure members. */ QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_read) != @@ -355,14 +355,14 @@ static inline target_ulong tlb_read_idx(const CPUTLBEntry *entry, #endif } -static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry) +static inline uint64_t tlb_addr_write(const CPUTLBEntry *entry) { return tlb_read_idx(entry, MMU_DATA_STORE); } /* Find the TLB index corresponding to the mmu_idx + address pair. */ static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx, - target_ulong addr) + vaddr addr) { uintptr_t size_mask = env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS; @@ -371,7 +371,7 @@ static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx, /* Find the TLB entry corresponding to the mmu_idx + address pair. */ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx, - target_ulong addr) + vaddr addr) { return &env_tlb(env)->f[mmu_idx].table[tlb_index(env, mmu_idx, addr)]; } From fb2c53cb710e3f3a4c6d4bb6f2465295e216b835 Mon Sep 17 00:00:00 2001 From: Anton Johansson Date: Wed, 21 Jun 2023 15:56:26 +0200 Subject: [PATCH 05/22] accel/tcg/cputlb.c: Widen addr in MMULookupPageData Functions accessing MMULookupPageData are also updated. Signed-off-by: Anton Johansson Reviewed-by: Richard Henderson Message-Id: <20230621135633.1649-6-anjo@rev.ng> Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index ac990a1526..cc53d0fb64 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1729,7 +1729,7 @@ bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, int mmu_idx, typedef struct MMULookupPageData { CPUTLBEntryFull *full; void *haddr; - target_ulong addr; + vaddr addr; int flags; int size; } MMULookupPageData; @@ -1756,7 +1756,7 @@ typedef struct MMULookupLocals { static bool mmu_lookup1(CPUArchState *env, MMULookupPageData *data, int mmu_idx, MMUAccessType access_type, uintptr_t ra) { - target_ulong addr = data->addr; + vaddr addr = data->addr; uintptr_t index = tlb_index(env, mmu_idx, addr); CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); uint64_t tlb_addr = tlb_read_idx(entry, access_type); @@ -1796,7 +1796,7 @@ static void mmu_watch_or_dirty(CPUArchState *env, MMULookupPageData *data, MMUAccessType access_type, uintptr_t ra) { CPUTLBEntryFull *full = data->full; - target_ulong addr = data->addr; + vaddr addr = data->addr; int flags = data->flags; int size = data->size; @@ -1827,7 +1827,7 @@ static void mmu_watch_or_dirty(CPUArchState *env, MMULookupPageData *data, * Resolve the translation for the page(s) beginning at @addr, for MemOp.size * bytes. Return true if the lookup crosses a page boundary. */ -static bool mmu_lookup(CPUArchState *env, target_ulong addr, MemOpIdx oi, +static bool mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi, uintptr_t ra, MMUAccessType type, MMULookupLocals *l) { unsigned a_bits; @@ -2024,7 +2024,7 @@ static uint64_t do_ld_mmio_beN(CPUArchState *env, MMULookupPageData *p, MMUAccessType type, uintptr_t ra) { CPUTLBEntryFull *full = p->full; - target_ulong addr = p->addr; + vaddr addr = p->addr; int i, size = p->size; QEMU_IOTHREAD_LOCK_GUARD(); @@ -2333,7 +2333,7 @@ static uint64_t do_ld_8(CPUArchState *env, MMULookupPageData *p, int mmu_idx, return ret; } -static uint8_t do_ld1_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, +static uint8_t do_ld1_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, uintptr_t ra, MMUAccessType access_type) { MMULookupLocals l; @@ -2352,7 +2352,7 @@ tcg_target_ulong helper_ldub_mmu(CPUArchState *env, uint64_t addr, return do_ld1_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD); } -static uint16_t do_ld2_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, +static uint16_t do_ld2_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, uintptr_t ra, MMUAccessType access_type) { MMULookupLocals l; @@ -2383,7 +2383,7 @@ tcg_target_ulong helper_lduw_mmu(CPUArchState *env, uint64_t addr, return do_ld2_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD); } -static uint32_t do_ld4_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, +static uint32_t do_ld4_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, uintptr_t ra, MMUAccessType access_type) { MMULookupLocals l; @@ -2410,7 +2410,7 @@ tcg_target_ulong helper_ldul_mmu(CPUArchState *env, uint64_t addr, return do_ld4_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD); } -static uint64_t do_ld8_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi, +static uint64_t do_ld8_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, uintptr_t ra, MMUAccessType access_type) { MMULookupLocals l; @@ -2460,7 +2460,7 @@ tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, uint64_t addr, return (int32_t)helper_ldul_mmu(env, addr, oi, retaddr); } -static Int128 do_ld16_mmu(CPUArchState *env, target_ulong addr, +static Int128 do_ld16_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, uintptr_t ra) { MMULookupLocals l; @@ -2617,7 +2617,7 @@ static uint64_t do_st_mmio_leN(CPUArchState *env, MMULookupPageData *p, uint64_t val_le, int mmu_idx, uintptr_t ra) { CPUTLBEntryFull *full = p->full; - target_ulong addr = p->addr; + vaddr addr = p->addr; int i, size = p->size; QEMU_IOTHREAD_LOCK_GUARD(); @@ -2808,7 +2808,7 @@ void helper_stb_mmu(CPUArchState *env, uint64_t addr, uint32_t val, do_st_1(env, &l.page[0], val, l.mmu_idx, ra); } -static void do_st2_mmu(CPUArchState *env, target_ulong addr, uint16_t val, +static void do_st2_mmu(CPUArchState *env, vaddr addr, uint16_t val, MemOpIdx oi, uintptr_t ra) { MMULookupLocals l; @@ -2837,7 +2837,7 @@ void helper_stw_mmu(CPUArchState *env, uint64_t addr, uint32_t val, do_st2_mmu(env, addr, val, oi, retaddr); } -static void do_st4_mmu(CPUArchState *env, target_ulong addr, uint32_t val, +static void do_st4_mmu(CPUArchState *env, vaddr addr, uint32_t val, MemOpIdx oi, uintptr_t ra) { MMULookupLocals l; @@ -2864,7 +2864,7 @@ void helper_stl_mmu(CPUArchState *env, uint64_t addr, uint32_t val, do_st4_mmu(env, addr, val, oi, retaddr); } -static void do_st8_mmu(CPUArchState *env, target_ulong addr, uint64_t val, +static void do_st8_mmu(CPUArchState *env, vaddr addr, uint64_t val, MemOpIdx oi, uintptr_t ra) { MMULookupLocals l; @@ -2891,7 +2891,7 @@ void helper_stq_mmu(CPUArchState *env, uint64_t addr, uint64_t val, do_st8_mmu(env, addr, val, oi, retaddr); } -static void do_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val, +static void do_st16_mmu(CPUArchState *env, vaddr addr, Int128 val, MemOpIdx oi, uintptr_t ra) { MMULookupLocals l; From f0a08b0913befbd400c16fb444612b6d034a2c53 Mon Sep 17 00:00:00 2001 From: Anton Johansson Date: Wed, 21 Jun 2023 15:56:27 +0200 Subject: [PATCH 06/22] accel/tcg/cpu-exec.c: Widen pc to vaddr Signed-off-by: Anton Johansson Reviewed-by: Richard Henderson Message-Id: <20230621135633.1649-7-anjo@rev.ng> Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 4d952a6cc2..ba1890a373 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -169,8 +169,8 @@ uint32_t curr_cflags(CPUState *cpu) } struct tb_desc { - target_ulong pc; - target_ulong cs_base; + vaddr pc; + uint64_t cs_base; CPUArchState *env; tb_page_addr_t page_addr0; uint32_t flags; @@ -193,7 +193,7 @@ static bool tb_lookup_cmp(const void *p, const void *d) return true; } else { tb_page_addr_t phys_page1; - target_ulong virt_page1; + vaddr virt_page1; /* * We know that the first page matched, and an otherwise valid TB @@ -214,8 +214,8 @@ static bool tb_lookup_cmp(const void *p, const void *d) return false; } -static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, - target_ulong cs_base, uint32_t flags, +static TranslationBlock *tb_htable_lookup(CPUState *cpu, vaddr pc, + uint64_t cs_base, uint32_t flags, uint32_t cflags) { tb_page_addr_t phys_pc; @@ -238,9 +238,9 @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, } /* Might cause an exception, so have a longjmp destination ready */ -static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, - target_ulong cs_base, - uint32_t flags, uint32_t cflags) +static inline TranslationBlock *tb_lookup(CPUState *cpu, vaddr pc, + uint64_t cs_base, uint32_t flags, + uint32_t cflags) { TranslationBlock *tb; CPUJumpCache *jc; @@ -292,13 +292,13 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, return tb; } -static void log_cpu_exec(target_ulong pc, CPUState *cpu, +static void log_cpu_exec(vaddr pc, CPUState *cpu, const TranslationBlock *tb) { if (qemu_log_in_addr_range(pc)) { qemu_log_mask(CPU_LOG_EXEC, "Trace %d: %p [%08" PRIx64 - "/" TARGET_FMT_lx "/%08x/%08x] %s\n", + "/%" VADDR_PRIx "/%08x/%08x] %s\n", cpu->cpu_index, tb->tc.ptr, tb->cs_base, pc, tb->flags, tb->cflags, lookup_symbol(pc)); @@ -323,7 +323,7 @@ static void log_cpu_exec(target_ulong pc, CPUState *cpu, } } -static bool check_for_breakpoints_slow(CPUState *cpu, target_ulong pc, +static bool check_for_breakpoints_slow(CPUState *cpu, vaddr pc, uint32_t *cflags) { CPUBreakpoint *bp; @@ -389,7 +389,7 @@ static bool check_for_breakpoints_slow(CPUState *cpu, target_ulong pc, return false; } -static inline bool check_for_breakpoints(CPUState *cpu, target_ulong pc, +static inline bool check_for_breakpoints(CPUState *cpu, vaddr pc, uint32_t *cflags) { return unlikely(!QTAILQ_EMPTY(&cpu->breakpoints)) && @@ -485,10 +485,10 @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit) cc->set_pc(cpu, last_tb->pc); } if (qemu_loglevel_mask(CPU_LOG_EXEC)) { - target_ulong pc = log_pc(cpu, last_tb); + vaddr pc = log_pc(cpu, last_tb); if (qemu_log_in_addr_range(pc)) { - qemu_log("Stopped execution of TB chain before %p [" - TARGET_FMT_lx "] %s\n", + qemu_log("Stopped execution of TB chain before %p [%" + VADDR_PRIx "] %s\n", last_tb->tc.ptr, pc, lookup_symbol(pc)); } } @@ -882,8 +882,8 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, } static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb, - target_ulong pc, - TranslationBlock **last_tb, int *tb_exit) + vaddr pc, TranslationBlock **last_tb, + int *tb_exit) { int32_t insns_left; From 06f3831c08ac0d36ce3d0b1f2deaa8ec1e1d8d7e Mon Sep 17 00:00:00 2001 From: Anton Johansson Date: Wed, 21 Jun 2023 15:56:28 +0200 Subject: [PATCH 07/22] accel/tcg: Widen pc to vaddr in CPUJumpCache Related functions dealing with the jump cache are also updated. Signed-off-by: Anton Johansson Reviewed-by: Richard Henderson Message-Id: <20230621135633.1649-8-anjo@rev.ng> Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 2 +- accel/tcg/tb-hash.h | 12 ++++++------ accel/tcg/tb-jmp-cache.h | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index cc53d0fb64..bdf400f6e6 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -99,7 +99,7 @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, desc->window_max_entries = max_entries; } -static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr) +static void tb_jmp_cache_clear_page(CPUState *cpu, vaddr page_addr) { CPUJumpCache *jc = cpu->tb_jmp_cache; int i, i0; diff --git a/accel/tcg/tb-hash.h b/accel/tcg/tb-hash.h index 2ba2193731..a0c61f25cd 100644 --- a/accel/tcg/tb-hash.h +++ b/accel/tcg/tb-hash.h @@ -35,16 +35,16 @@ #define TB_JMP_ADDR_MASK (TB_JMP_PAGE_SIZE - 1) #define TB_JMP_PAGE_MASK (TB_JMP_CACHE_SIZE - TB_JMP_PAGE_SIZE) -static inline unsigned int tb_jmp_cache_hash_page(target_ulong pc) +static inline unsigned int tb_jmp_cache_hash_page(vaddr pc) { - target_ulong tmp; + vaddr tmp; tmp = pc ^ (pc >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS)); return (tmp >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS)) & TB_JMP_PAGE_MASK; } -static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc) +static inline unsigned int tb_jmp_cache_hash_func(vaddr pc) { - target_ulong tmp; + vaddr tmp; tmp = pc ^ (pc >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS)); return (((tmp >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS)) & TB_JMP_PAGE_MASK) | (tmp & TB_JMP_ADDR_MASK)); @@ -53,7 +53,7 @@ static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc) #else /* In user-mode we can get better hashing because we do not have a TLB */ -static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc) +static inline unsigned int tb_jmp_cache_hash_func(vaddr pc) { return (pc ^ (pc >> TB_JMP_CACHE_BITS)) & (TB_JMP_CACHE_SIZE - 1); } @@ -61,7 +61,7 @@ static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc) #endif /* CONFIG_SOFTMMU */ static inline -uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, +uint32_t tb_hash_func(tb_page_addr_t phys_pc, vaddr pc, uint32_t flags, uint64_t flags2, uint32_t cf_mask) { return qemu_xxhash8(phys_pc, pc, flags2, flags, cf_mask); diff --git a/accel/tcg/tb-jmp-cache.h b/accel/tcg/tb-jmp-cache.h index bee87eb840..bb424c8a05 100644 --- a/accel/tcg/tb-jmp-cache.h +++ b/accel/tcg/tb-jmp-cache.h @@ -21,7 +21,7 @@ struct CPUJumpCache { struct rcu_head rcu; struct { TranslationBlock *tb; - target_ulong pc; + vaddr pc; } array[TB_JMP_CACHE_SIZE]; }; From 4f8f41272eec57105e82dbb5761354ef0da9f7b0 Mon Sep 17 00:00:00 2001 From: Anton Johansson Date: Wed, 21 Jun 2023 15:56:29 +0200 Subject: [PATCH 08/22] accel: Replace target_ulong with vaddr in probe_*() Functions for probing memory accesses (and functions that call these) are updated to take a vaddr for guest virtual addresses over target_ulong. Signed-off-by: Anton Johansson Reviewed-by: Richard Henderson Message-Id: <20230621135633.1649-9-anjo@rev.ng> Signed-off-by: Richard Henderson --- accel/stubs/tcg-stub.c | 4 ++-- accel/tcg/cputlb.c | 12 ++++++------ accel/tcg/user-exec.c | 8 ++++---- include/exec/exec-all.h | 14 +++++++------- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/accel/stubs/tcg-stub.c b/accel/stubs/tcg-stub.c index 0998e601ad..a9e7a2d5b4 100644 --- a/accel/stubs/tcg-stub.c +++ b/accel/stubs/tcg-stub.c @@ -26,14 +26,14 @@ void tcg_flush_jmp_cache(CPUState *cpu) { } -int probe_access_flags(CPUArchState *env, target_ulong addr, int size, +int probe_access_flags(CPUArchState *env, vaddr addr, int size, MMUAccessType access_type, int mmu_idx, bool nonfault, void **phost, uintptr_t retaddr) { g_assert_not_reached(); } -void *probe_access(CPUArchState *env, target_ulong addr, int size, +void *probe_access(CPUArchState *env, vaddr addr, int size, MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) { /* Handled by hardware accelerator. */ diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index bdf400f6e6..d873e58a5d 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1499,7 +1499,7 @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, } } -static int probe_access_internal(CPUArchState *env, target_ulong addr, +static int probe_access_internal(CPUArchState *env, vaddr addr, int fault_size, MMUAccessType access_type, int mmu_idx, bool nonfault, void **phost, CPUTLBEntryFull **pfull, @@ -1508,7 +1508,7 @@ static int probe_access_internal(CPUArchState *env, target_ulong addr, uintptr_t index = tlb_index(env, mmu_idx, addr); CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); uint64_t tlb_addr = tlb_read_idx(entry, access_type); - target_ulong page_addr = addr & TARGET_PAGE_MASK; + vaddr page_addr = addr & TARGET_PAGE_MASK; int flags = TLB_FLAGS_MASK; if (!tlb_hit_page(tlb_addr, page_addr)) { @@ -1551,7 +1551,7 @@ static int probe_access_internal(CPUArchState *env, target_ulong addr, return flags; } -int probe_access_full(CPUArchState *env, target_ulong addr, int size, +int probe_access_full(CPUArchState *env, vaddr addr, int size, MMUAccessType access_type, int mmu_idx, bool nonfault, void **phost, CPUTLBEntryFull **pfull, uintptr_t retaddr) @@ -1568,7 +1568,7 @@ int probe_access_full(CPUArchState *env, target_ulong addr, int size, return flags; } -int probe_access_flags(CPUArchState *env, target_ulong addr, int size, +int probe_access_flags(CPUArchState *env, vaddr addr, int size, MMUAccessType access_type, int mmu_idx, bool nonfault, void **phost, uintptr_t retaddr) { @@ -1589,7 +1589,7 @@ int probe_access_flags(CPUArchState *env, target_ulong addr, int size, return flags; } -void *probe_access(CPUArchState *env, target_ulong addr, int size, +void *probe_access(CPUArchState *env, vaddr addr, int size, MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) { CPUTLBEntryFull *full; @@ -1648,7 +1648,7 @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, * NOTE: This function will trigger an exception if the page is * not executable. */ -tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, +tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr, void **hostp) { CPUTLBEntryFull *full; diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index dc8d6b5d40..d71e26a7b5 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -721,7 +721,7 @@ int page_unprotect(target_ulong address, uintptr_t pc) return current_tb_invalidated ? 2 : 1; } -static int probe_access_internal(CPUArchState *env, target_ulong addr, +static int probe_access_internal(CPUArchState *env, vaddr addr, int fault_size, MMUAccessType access_type, bool nonfault, uintptr_t ra) { @@ -759,7 +759,7 @@ static int probe_access_internal(CPUArchState *env, target_ulong addr, cpu_loop_exit_sigsegv(env_cpu(env), addr, access_type, maperr, ra); } -int probe_access_flags(CPUArchState *env, target_ulong addr, int size, +int probe_access_flags(CPUArchState *env, vaddr addr, int size, MMUAccessType access_type, int mmu_idx, bool nonfault, void **phost, uintptr_t ra) { @@ -771,7 +771,7 @@ int probe_access_flags(CPUArchState *env, target_ulong addr, int size, return flags; } -void *probe_access(CPUArchState *env, target_ulong addr, int size, +void *probe_access(CPUArchState *env, vaddr addr, int size, MMUAccessType access_type, int mmu_idx, uintptr_t ra) { int flags; @@ -783,7 +783,7 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size, return size ? g2h(env_cpu(env), addr) : NULL; } -tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, +tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr, void **hostp) { int flags; diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index f5508e242b..cc1c3556f6 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -413,16 +413,16 @@ static inline void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu, * Finally, return the host address for a page that is backed by RAM, * or NULL if the page requires I/O. */ -void *probe_access(CPUArchState *env, target_ulong addr, int size, +void *probe_access(CPUArchState *env, vaddr addr, int size, MMUAccessType access_type, int mmu_idx, uintptr_t retaddr); -static inline void *probe_write(CPUArchState *env, target_ulong addr, int size, +static inline void *probe_write(CPUArchState *env, vaddr addr, int size, int mmu_idx, uintptr_t retaddr) { return probe_access(env, addr, size, MMU_DATA_STORE, mmu_idx, retaddr); } -static inline void *probe_read(CPUArchState *env, target_ulong addr, int size, +static inline void *probe_read(CPUArchState *env, vaddr addr, int size, int mmu_idx, uintptr_t retaddr) { return probe_access(env, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr); @@ -447,7 +447,7 @@ static inline void *probe_read(CPUArchState *env, target_ulong addr, int size, * Do handle clean pages, so exclude TLB_NOTDIRY from the returned flags. * For simplicity, all "mmio-like" flags are folded to TLB_MMIO. */ -int probe_access_flags(CPUArchState *env, target_ulong addr, int size, +int probe_access_flags(CPUArchState *env, vaddr addr, int size, MMUAccessType access_type, int mmu_idx, bool nonfault, void **phost, uintptr_t retaddr); @@ -460,7 +460,7 @@ int probe_access_flags(CPUArchState *env, target_ulong addr, int size, * and must be consumed or copied immediately, before any further * access or changes to TLB @mmu_idx. */ -int probe_access_full(CPUArchState *env, target_ulong addr, int size, +int probe_access_full(CPUArchState *env, vaddr addr, int size, MMUAccessType access_type, int mmu_idx, bool nonfault, void **phost, CPUTLBEntryFull **pfull, uintptr_t retaddr); @@ -581,7 +581,7 @@ struct MemoryRegionSection *iotlb_to_section(CPUState *cpu, * * Note: this function can trigger an exception. */ -tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, +tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr, void **hostp); /** @@ -596,7 +596,7 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, * Note: this function can trigger an exception. */ static inline tb_page_addr_t get_page_addr_code(CPUArchState *env, - target_ulong addr) + vaddr addr) { return get_page_addr_code_hostp(env, addr, NULL); } From b0326eb99917d99a37c8cd9d479a2e9b04659076 Mon Sep 17 00:00:00 2001 From: Anton Johansson Date: Wed, 21 Jun 2023 15:56:30 +0200 Subject: [PATCH 09/22] accel/tcg: Replace target_ulong with vaddr in *_mmu_lookup() Update atomic_mmu_lookup() and cpu_mmu_lookup() to take the guest virtual address as a vaddr instead of a target_ulong. Signed-off-by: Anton Johansson Reviewed-by: Richard Henderson Message-Id: <20230621135633.1649-10-anjo@rev.ng> Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 6 +++--- accel/tcg/user-exec.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index d873e58a5d..e02cfc550e 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1898,15 +1898,15 @@ static bool mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi, * Probe for an atomic operation. Do not allow unaligned operations, * or io operations to proceed. Return the host address. */ -static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, - MemOpIdx oi, int size, uintptr_t retaddr) +static void *atomic_mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi, + int size, uintptr_t retaddr) { uintptr_t mmu_idx = get_mmuidx(oi); MemOp mop = get_memop(oi); int a_bits = get_alignment_bits(mop); uintptr_t index; CPUTLBEntry *tlbe; - target_ulong tlb_addr; + vaddr tlb_addr; void *hostaddr; CPUTLBEntryFull *full; diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index d71e26a7b5..f8b16d6ab8 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -889,7 +889,7 @@ void page_reset_target_data(target_ulong start, target_ulong last) { } /* The softmmu versions of these helpers are in cputlb.c. */ -static void *cpu_mmu_lookup(CPUArchState *env, abi_ptr addr, +static void *cpu_mmu_lookup(CPUArchState *env, vaddr addr, MemOp mop, uintptr_t ra, MMUAccessType type) { int a_bits = get_alignment_bits(mop); @@ -1324,8 +1324,8 @@ uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr, /* * Do not allow unaligned operations to proceed. Return the host address. */ -static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, - MemOpIdx oi, int size, uintptr_t retaddr) +static void *atomic_mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi, + int size, uintptr_t retaddr) { MemOp mop = get_memop(oi); int a_bits = get_alignment_bits(mop); From b1c09220b4cef17632495bfbb9a1355ce3eb301c Mon Sep 17 00:00:00 2001 From: Anton Johansson Date: Wed, 21 Jun 2023 15:56:31 +0200 Subject: [PATCH 10/22] accel/tcg: Replace target_ulong with vaddr in translator_*() Use vaddr for guest virtual address in translator_use_goto_tb() and translator_loop(). Signed-off-by: Anton Johansson Reviewed-by: Richard Henderson Message-Id: <20230621135633.1649-11-anjo@rev.ng> Signed-off-by: Richard Henderson --- accel/tcg/translator.c | 10 +++++----- include/exec/translator.h | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c index 918a455e73..0fd9efceba 100644 --- a/accel/tcg/translator.c +++ b/accel/tcg/translator.c @@ -117,7 +117,7 @@ static void gen_tb_end(const TranslationBlock *tb, uint32_t cflags, } } -bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest) +bool translator_use_goto_tb(DisasContextBase *db, vaddr dest) { /* Suppress goto_tb if requested. */ if (tb_cflags(db->tb) & CF_NO_GOTO_TB) { @@ -129,8 +129,8 @@ bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest) } void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns, - target_ulong pc, void *host_pc, - const TranslatorOps *ops, DisasContextBase *db) + vaddr pc, void *host_pc, const TranslatorOps *ops, + DisasContextBase *db) { uint32_t cflags = tb_cflags(tb); TCGOp *icount_start_insn; @@ -235,10 +235,10 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns, } static void *translator_access(CPUArchState *env, DisasContextBase *db, - target_ulong pc, size_t len) + vaddr pc, size_t len) { void *host; - target_ulong base, end; + vaddr base, end; TranslationBlock *tb; tb = db->tb; diff --git a/include/exec/translator.h b/include/exec/translator.h index 224ae14aa7..a53d3243d4 100644 --- a/include/exec/translator.h +++ b/include/exec/translator.h @@ -142,8 +142,8 @@ typedef struct TranslatorOps { * - When too many instructions have been translated. */ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns, - target_ulong pc, void *host_pc, - const TranslatorOps *ops, DisasContextBase *db); + vaddr pc, void *host_pc, const TranslatorOps *ops, + DisasContextBase *db); /** * translator_use_goto_tb @@ -153,7 +153,7 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns, * Return true if goto_tb is allowed between the current TB * and the destination PC. */ -bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest); +bool translator_use_goto_tb(DisasContextBase *db, vaddr dest); /** * translator_io_start From c814c892e5e7f55eb5184e4aaf3bfa918070fbb1 Mon Sep 17 00:00:00 2001 From: Anton Johansson Date: Wed, 21 Jun 2023 15:56:33 +0200 Subject: [PATCH 11/22] cpu: Replace target_ulong with hwaddr in tb_invalidate_phys_addr() Signed-off-by: Anton Johansson Reviewed-by: Richard Henderson Message-Id: <20230621135633.1649-13-anjo@rev.ng> Signed-off-by: Richard Henderson --- cpu.c | 2 +- include/exec/exec-all.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpu.c b/cpu.c index 65ebaf8159..1c948d1161 100644 --- a/cpu.c +++ b/cpu.c @@ -293,7 +293,7 @@ void list_cpus(void) } #if defined(CONFIG_USER_ONLY) -void tb_invalidate_phys_addr(target_ulong addr) +void tb_invalidate_phys_addr(hwaddr addr) { mmap_lock(); tb_invalidate_phys_page(addr); diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index cc1c3556f6..200c27eadf 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -526,7 +526,7 @@ uint32_t curr_cflags(CPUState *cpu); /* TranslationBlock invalidate API */ #if defined(CONFIG_USER_ONLY) -void tb_invalidate_phys_addr(target_ulong addr); +void tb_invalidate_phys_addr(hwaddr addr); #else void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs); #endif From 1d3daf95254d998b91445c48de875796df3b0998 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Tue, 23 May 2023 14:11:07 +0100 Subject: [PATCH 12/22] softfloat: use QEMU_FLATTEN to avoid mistaken isra inlining MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Balton discovered that asserts for the extract/deposit calls had a significant impact on a lame benchmark on qemu-ppc. Replicating with: ./qemu-ppc64 ~/lsrc/tests/lame.git-svn/builds/ppc64/frontend/lame \ -h pts-trondheim-3.wav pts-trondheim-3.mp3 showed up the pack/unpack routines not eliding the assert checks as it should have done causing them to prominently figure in the profile:  11.44%  qemu-ppc64  qemu-ppc64               [.] unpack_raw64.isra.0  11.03%  qemu-ppc64  qemu-ppc64               [.] parts64_uncanon_normal   8.26%  qemu-ppc64  qemu-ppc64               [.] helper_compute_fprf_float64   6.75%  qemu-ppc64  qemu-ppc64               [.] do_float_check_status   5.34%  qemu-ppc64  qemu-ppc64               [.] parts64_muladd   4.75%  qemu-ppc64  qemu-ppc64               [.] pack_raw64.isra.0   4.38%  qemu-ppc64  qemu-ppc64               [.] parts64_canonicalize   3.62%  qemu-ppc64  qemu-ppc64               [.] float64r32_round_pack_canonical After this patch the same test runs 31 seconds faster with a profile where the generated code dominates more: + 14.12% 0.00% qemu-ppc64 [unknown] [.] 0x0000004000619420 + 13.30% 0.00% qemu-ppc64 [unknown] [.] 0x0000004000616850 + 12.58% 12.19% qemu-ppc64 qemu-ppc64 [.] parts64_uncanon_normal + 10.62% 0.00% qemu-ppc64 [unknown] [.] 0x000000400061bf70 + 9.91% 9.73% qemu-ppc64 qemu-ppc64 [.] helper_compute_fprf_float64 + 7.84% 7.82% qemu-ppc64 qemu-ppc64 [.] do_float_check_status + 6.47% 5.78% qemu-ppc64 qemu-ppc64 [.] parts64_canonicalize.constprop.0 + 6.46% 0.00% qemu-ppc64 [unknown] [.] 0x0000004000620130 + 6.42% 0.00% qemu-ppc64 [unknown] [.] 0x0000004000619400 + 6.17% 6.04% qemu-ppc64 qemu-ppc64 [.] parts64_muladd + 5.85% 0.00% qemu-ppc64 [unknown] [.] 0x00000040006167e0 + 5.74% 0.00% qemu-ppc64 [unknown] [.] 0x0000b693fcffffd3 + 5.45% 4.78% qemu-ppc64 qemu-ppc64 [.] float64r32_round_pack_canonical Suggested-by: Richard Henderson Message-Id: [AJB: Patchified rth's suggestion] Signed-off-by: Alex Bennée Cc: BALATON Zoltan Reviewed-by: Richard Henderson Reviewed-by: Philippe Mathieu-Daudé Tested-by: BALATON Zoltan Message-Id: <20230523131107.3680641-1-alex.bennee@linaro.org> Signed-off-by: Richard Henderson --- fpu/softfloat.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 108f9cb224..42e6c188b4 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -593,27 +593,27 @@ static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw) }; } -static inline void float16_unpack_raw(FloatParts64 *p, float16 f) +static void QEMU_FLATTEN float16_unpack_raw(FloatParts64 *p, float16 f) { unpack_raw64(p, &float16_params, f); } -static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f) +static void QEMU_FLATTEN bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f) { unpack_raw64(p, &bfloat16_params, f); } -static inline void float32_unpack_raw(FloatParts64 *p, float32 f) +static void QEMU_FLATTEN float32_unpack_raw(FloatParts64 *p, float32 f) { unpack_raw64(p, &float32_params, f); } -static inline void float64_unpack_raw(FloatParts64 *p, float64 f) +static void QEMU_FLATTEN float64_unpack_raw(FloatParts64 *p, float64 f) { unpack_raw64(p, &float64_params, f); } -static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f) +static void QEMU_FLATTEN floatx80_unpack_raw(FloatParts128 *p, floatx80 f) { *p = (FloatParts128) { .cls = float_class_unclassified, @@ -623,7 +623,7 @@ static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f) }; } -static void float128_unpack_raw(FloatParts128 *p, float128 f) +static void QEMU_FLATTEN float128_unpack_raw(FloatParts128 *p, float128 f) { const int f_size = float128_params.frac_size - 64; const int e_size = float128_params.exp_size; @@ -650,27 +650,27 @@ static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt) return ret; } -static inline float16 float16_pack_raw(const FloatParts64 *p) +static float16 QEMU_FLATTEN float16_pack_raw(const FloatParts64 *p) { return make_float16(pack_raw64(p, &float16_params)); } -static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p) +static bfloat16 QEMU_FLATTEN bfloat16_pack_raw(const FloatParts64 *p) { return pack_raw64(p, &bfloat16_params); } -static inline float32 float32_pack_raw(const FloatParts64 *p) +static float32 QEMU_FLATTEN float32_pack_raw(const FloatParts64 *p) { return make_float32(pack_raw64(p, &float32_params)); } -static inline float64 float64_pack_raw(const FloatParts64 *p) +static float64 QEMU_FLATTEN float64_pack_raw(const FloatParts64 *p) { return make_float64(pack_raw64(p, &float64_params)); } -static float128 float128_pack_raw(const FloatParts128 *p) +static float128 QEMU_FLATTEN float128_pack_raw(const FloatParts128 *p) { const int f_size = float128_params.frac_size - 64; const int e_size = float128_params.exp_size; From ea185a557bc97868f3729060ea1cd003dbd971d1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 10 Jun 2023 10:19:59 -0700 Subject: [PATCH 13/22] tests/plugin: Remove duplicate insn log from libinsn.so This is a perfectly natural occurrence for x86 "rep movb", where the "rep" prefix forms a counted loop of the one insn. During the tests/tcg/multiarch/memory test, this logging is triggered over 350000 times. Within the context of cross-i386-tci build, which is already slow by nature, the logging is sufficient to push the test into timeout. Signed-off-by: Richard Henderson --- tests/plugin/insn.c | 9 +-------- tests/tcg/i386/Makefile.softmmu-target | 9 --------- tests/tcg/i386/Makefile.target | 6 ------ tests/tcg/x86_64/Makefile.softmmu-target | 9 --------- 4 files changed, 1 insertion(+), 32 deletions(-) diff --git a/tests/plugin/insn.c b/tests/plugin/insn.c index cd5ea5d4ae..9bd6e44f73 100644 --- a/tests/plugin/insn.c +++ b/tests/plugin/insn.c @@ -19,7 +19,6 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; #define MAX_CPUS 8 /* lets not go nuts */ typedef struct { - uint64_t last_pc; uint64_t insn_count; } InstructionCount; @@ -51,13 +50,7 @@ static void vcpu_insn_exec_before(unsigned int cpu_index, void *udata) { unsigned int i = cpu_index % MAX_CPUS; InstructionCount *c = &counts[i]; - uint64_t this_pc = GPOINTER_TO_UINT(udata); - if (this_pc == c->last_pc) { - g_autofree gchar *out = g_strdup_printf("detected repeat execution @ 0x%" - PRIx64 "\n", this_pc); - qemu_plugin_outs(out); - } - c->last_pc = this_pc; + c->insn_count++; } diff --git a/tests/tcg/i386/Makefile.softmmu-target b/tests/tcg/i386/Makefile.softmmu-target index ed922d59c8..5266f2335a 100644 --- a/tests/tcg/i386/Makefile.softmmu-target +++ b/tests/tcg/i386/Makefile.softmmu-target @@ -33,14 +33,5 @@ EXTRA_RUNS+=$(MULTIARCH_RUNS) memory: CFLAGS+=-DCHECK_UNALIGNED=1 -# non-inline runs will trigger the duplicate instruction heuristics in libinsn.so -run-plugin-%-with-libinsn.so: - $(call run-test, $@, \ - $(QEMU) -monitor none -display none \ - -chardev file$(COMMA)path=$@.out$(COMMA)id=output \ - -plugin ../../plugin/libinsn.so$(COMMA)inline=on \ - -d plugin -D $*-with-libinsn.so.pout \ - $(QEMU_OPTS) $*) - # Running QEMU_OPTS+=-device isa-debugcon,chardev=output -device isa-debug-exit,iobase=0xf4,iosize=0x4 -kernel diff --git a/tests/tcg/i386/Makefile.target b/tests/tcg/i386/Makefile.target index 821822ed0c..f2ee7a4db7 100644 --- a/tests/tcg/i386/Makefile.target +++ b/tests/tcg/i386/Makefile.target @@ -63,12 +63,6 @@ else SKIP_I386_TESTS+=test-i386-fprem endif -# non-inline runs will trigger the duplicate instruction heuristics in libinsn.so -run-plugin-%-with-libinsn.so: - $(call run-test, $@, $(QEMU) $(QEMU_OPTS) \ - -plugin ../../plugin/libinsn.so$(COMMA)inline=on \ - -d plugin -D $*-with-libinsn.so.pout $*) - # Update TESTS I386_TESTS:=$(filter-out $(SKIP_I386_TESTS), $(ALL_X86_TESTS)) TESTS=$(MULTIARCH_TESTS) $(I386_TESTS) diff --git a/tests/tcg/x86_64/Makefile.softmmu-target b/tests/tcg/x86_64/Makefile.softmmu-target index 7207fee94c..1bd763f2e6 100644 --- a/tests/tcg/x86_64/Makefile.softmmu-target +++ b/tests/tcg/x86_64/Makefile.softmmu-target @@ -33,14 +33,5 @@ EXTRA_RUNS+=$(MULTIARCH_RUNS) memory: CFLAGS+=-DCHECK_UNALIGNED=1 -# non-inline runs will trigger the duplicate instruction heuristics in libinsn.so -run-plugin-%-with-libinsn.so: - $(call run-test, $@, \ - $(QEMU) -monitor none -display none \ - -chardev file$(COMMA)path=$@.out$(COMMA)id=output \ - -plugin ../../plugin/libinsn.so$(COMMA)inline=on \ - -d plugin -D $*-with-libinsn.so.pout \ - $(QEMU_OPTS) $*) - # Running QEMU_OPTS+=-device isa-debugcon,chardev=output -device isa-debug-exit,iobase=0xf4,iosize=0x4 -kernel From 1b65b4f54c7f7d07b5d35fac5f7de3e75f5f21a0 Mon Sep 17 00:00:00 2001 From: Fei Wu Date: Wed, 7 Jun 2023 20:24:02 +0800 Subject: [PATCH 14/22] accel/tcg: remove CONFIG_PROFILER MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TBStats will be introduced to replace CONFIG_PROFILER totally, here remove all CONFIG_PROFILER related stuffs first. Signed-off-by: Vanderson M. do Rosario Signed-off-by: Alex Bennée Signed-off-by: Fei Wu Reviewed-by: Richard Henderson Message-Id: <20230607122411.3394702-2-fei2.wu@intel.com> Signed-off-by: Richard Henderson --- accel/tcg/monitor.c | 31 ----- accel/tcg/tcg-accel-ops.c | 10 -- accel/tcg/translate-all.c | 33 ------ hmp-commands-info.hx | 15 --- include/qemu/timer.h | 9 -- include/tcg/tcg.h | 26 ----- meson.build | 2 - meson_options.txt | 2 - qapi/machine.json | 18 --- scripts/meson-buildoptions.sh | 3 - softmmu/runstate.c | 9 -- tcg/tcg.c | 214 ---------------------------------- tests/qtest/qmp-cmd-test.c | 3 - 13 files changed, 375 deletions(-) diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c index f171bc6f5e..d48de23999 100644 --- a/accel/tcg/monitor.c +++ b/accel/tcg/monitor.c @@ -81,37 +81,6 @@ HumanReadableText *qmp_x_query_opcount(Error **errp) return human_readable_text_from_str(buf); } -#ifdef CONFIG_PROFILER - -int64_t dev_time; - -HumanReadableText *qmp_x_query_profile(Error **errp) -{ - g_autoptr(GString) buf = g_string_new(""); - static int64_t last_cpu_exec_time; - int64_t cpu_exec_time; - int64_t delta; - - cpu_exec_time = tcg_cpu_exec_time(); - delta = cpu_exec_time - last_cpu_exec_time; - - g_string_append_printf(buf, "async time %" PRId64 " (%0.3f)\n", - dev_time, dev_time / (double)NANOSECONDS_PER_SECOND); - g_string_append_printf(buf, "qemu time %" PRId64 " (%0.3f)\n", - delta, delta / (double)NANOSECONDS_PER_SECOND); - last_cpu_exec_time = cpu_exec_time; - dev_time = 0; - - return human_readable_text_from_str(buf); -} -#else -HumanReadableText *qmp_x_query_profile(Error **errp) -{ - error_setg(errp, "Internal profiler not compiled"); - return NULL; -} -#endif - static void hmp_tcg_register(void) { monitor_register_hmp_info_hrt("jit", qmp_x_query_jit); diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c index 58c8e64096..3973591508 100644 --- a/accel/tcg/tcg-accel-ops.c +++ b/accel/tcg/tcg-accel-ops.c @@ -70,20 +70,10 @@ void tcg_cpus_destroy(CPUState *cpu) int tcg_cpus_exec(CPUState *cpu) { int ret; -#ifdef CONFIG_PROFILER - int64_t ti; -#endif assert(tcg_enabled()); -#ifdef CONFIG_PROFILER - ti = profile_getclock(); -#endif cpu_exec_start(cpu); ret = cpu_exec(cpu); cpu_exec_end(cpu); -#ifdef CONFIG_PROFILER - qatomic_set(&tcg_ctx->prof.cpu_exec_time, - tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti); -#endif return ret; } diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 03c49baf1c..d3d4fbc1a4 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -202,10 +202,6 @@ void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, uintptr_t host_pc) { uint64_t data[TARGET_INSN_START_WORDS]; -#ifdef CONFIG_PROFILER - TCGProfile *prof = &tcg_ctx->prof; - int64_t ti = profile_getclock(); -#endif int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data); if (insns_left < 0) { @@ -222,12 +218,6 @@ void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, } cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data); - -#ifdef CONFIG_PROFILER - qatomic_set(&prof->restore_time, - prof->restore_time + profile_getclock() - ti); - qatomic_set(&prof->restore_count, prof->restore_count + 1); -#endif } bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc) @@ -290,13 +280,6 @@ static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb, tcg_ctx->cpu = NULL; *max_insns = tb->icount; -#ifdef CONFIG_PROFILER - qatomic_set(&tcg_ctx->prof.tb_count, tcg_ctx->prof.tb_count + 1); - qatomic_set(&tcg_ctx->prof.interm_time, - tcg_ctx->prof.interm_time + profile_getclock() - *ti); - *ti = profile_getclock(); -#endif - return tcg_gen_code(tcg_ctx, tb, pc); } @@ -310,9 +293,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tb_page_addr_t phys_pc; tcg_insn_unit *gen_code_buf; int gen_code_size, search_size, max_insns; -#ifdef CONFIG_PROFILER - TCGProfile *prof = &tcg_ctx->prof; -#endif int64_t ti; void *host_pc; @@ -371,12 +351,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tb_overflow: -#ifdef CONFIG_PROFILER - /* includes aborted translations because of exceptions */ - qatomic_set(&prof->tb_count1, prof->tb_count1 + 1); - ti = profile_getclock(); -#endif - trace_translate_block(tb, pc, tb->tc.ptr); gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti); @@ -431,13 +405,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu, */ perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf)); -#ifdef CONFIG_PROFILER - qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti); - qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size); - qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size); - qatomic_set(&prof->search_out_len, prof->search_out_len + search_size); -#endif - if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) && qemu_log_in_addr_range(pc)) { FILE *logfile = qemu_log_trylock(); diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx index 47d63d26db..f5b37eb74a 100644 --- a/hmp-commands-info.hx +++ b/hmp-commands-info.hx @@ -360,21 +360,6 @@ SRST Show host USB devices. ERST -#if defined(CONFIG_TCG) - { - .name = "profile", - .args_type = "", - .params = "", - .help = "show profiling information", - .cmd_info_hrt = qmp_x_query_profile, - }, -#endif - -SRST - ``info profile`` - Show profiling information. -ERST - { .name = "capture", .args_type = "", diff --git a/include/qemu/timer.h b/include/qemu/timer.h index ee071e07d1..9a91cb1248 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -989,13 +989,4 @@ static inline int64_t cpu_get_host_ticks(void) } #endif -#ifdef CONFIG_PROFILER -static inline int64_t profile_getclock(void) -{ - return get_clock(); -} - -extern int64_t dev_time; -#endif - #endif diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index a498f31967..95541e9474 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -478,27 +478,6 @@ static inline TCGRegSet output_pref(const TCGOp *op, unsigned i) return i < ARRAY_SIZE(op->output_pref) ? op->output_pref[i] : 0; } -typedef struct TCGProfile { - int64_t cpu_exec_time; - int64_t tb_count1; - int64_t tb_count; - int64_t op_count; /* total insn count */ - int op_count_max; /* max insn per TB */ - int temp_count_max; - int64_t temp_count; - int64_t del_op_count; - int64_t code_in_len; - int64_t code_out_len; - int64_t search_out_len; - int64_t interm_time; - int64_t code_time; - int64_t la_time; - int64_t opt_time; - int64_t restore_count; - int64_t restore_time; - int64_t table_op_count[NB_OPS]; -} TCGProfile; - struct TCGContext { uint8_t *pool_cur, *pool_end; TCGPool *pool_first, *pool_current, *pool_first_large; @@ -528,10 +507,6 @@ struct TCGContext { tcg_insn_unit *code_buf; /* pointer for start of tb */ tcg_insn_unit *code_ptr; /* pointer for running end of tb */ -#ifdef CONFIG_PROFILER - TCGProfile prof; -#endif - #ifdef CONFIG_DEBUG_TCG int goto_tb_issue_mask; const TCGOpcode *vecop_list; @@ -871,7 +846,6 @@ static inline TCGv_ptr tcg_temp_new_ptr(void) return temp_tcgv_ptr(t); } -int64_t tcg_cpu_exec_time(void); void tcg_dump_info(GString *buf); void tcg_dump_op_count(GString *buf); diff --git a/meson.build b/meson.build index 3e3d38badb..b409788832 100644 --- a/meson.build +++ b/meson.build @@ -2121,7 +2121,6 @@ if numa.found() dependencies: numa)) endif config_host_data.set('CONFIG_OPENGL', opengl.found()) -config_host_data.set('CONFIG_PROFILER', get_option('profiler')) config_host_data.set('CONFIG_RBD', rbd.found()) config_host_data.set('CONFIG_RDMA', rdma.found()) config_host_data.set('CONFIG_SAFESTACK', get_option('safe_stack')) @@ -4087,7 +4086,6 @@ if 'objc' in all_languages summary_info += {'QEMU_OBJCFLAGS': ' '.join(qemu_common_flags)} endif summary_info += {'QEMU_LDFLAGS': ' '.join(qemu_ldflags)} -summary_info += {'profiler': get_option('profiler')} summary_info += {'link-time optimization (LTO)': get_option('b_lto')} summary_info += {'PIE': get_option('b_pie')} summary_info += {'static build': get_option('prefer_static')} diff --git a/meson_options.txt b/meson_options.txt index 90237389e2..bbb5c7e886 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -345,8 +345,6 @@ option('qom_cast_debug', type: 'boolean', value: true, option('gprof', type: 'boolean', value: false, description: 'QEMU profiling with gprof', deprecated: true) -option('profiler', type: 'boolean', value: false, - description: 'profiler support') option('slirp_smbd', type : 'feature', value : 'auto', description: 'use smbd (at path --smbd=*) in slirp networking') diff --git a/qapi/machine.json b/qapi/machine.json index 37660d8f2a..a08b6576ca 100644 --- a/qapi/machine.json +++ b/qapi/machine.json @@ -1575,24 +1575,6 @@ 'if': 'CONFIG_TCG', 'features': [ 'unstable' ] } -## -# @x-query-profile: -# -# Query TCG profiling information -# -# Features: -# -# @unstable: This command is meant for debugging. -# -# Returns: profile information -# -# Since: 6.2 -## -{ 'command': 'x-query-profile', - 'returns': 'HumanReadableText', - 'if': 'CONFIG_TCG', - 'features': [ 'unstable' ] } - ## # @x-query-ramblock: # diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh index 5714fd93d9..7dd5709ef4 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -39,7 +39,6 @@ meson_options_help() { printf "%s\n" ' jemalloc/system/tcmalloc)' printf "%s\n" ' --enable-module-upgrades try to load modules from alternate paths for' printf "%s\n" ' upgrades' - printf "%s\n" ' --enable-profiler profiler support' printf "%s\n" ' --enable-rng-none dummy RNG, avoid using /dev/(u)random and' printf "%s\n" ' getrandom()' printf "%s\n" ' --enable-safe-stack SafeStack Stack Smash Protection (requires' @@ -401,8 +400,6 @@ _meson_option_parse() { --with-pkgversion=*) quote_sh "-Dpkgversion=$2" ;; --enable-png) printf "%s" -Dpng=enabled ;; --disable-png) printf "%s" -Dpng=disabled ;; - --enable-profiler) printf "%s" -Dprofiler=true ;; - --disable-profiler) printf "%s" -Dprofiler=false ;; --enable-pvrdma) printf "%s" -Dpvrdma=enabled ;; --disable-pvrdma) printf "%s" -Dpvrdma=disabled ;; --enable-qcow1) printf "%s" -Dqcow1=enabled ;; diff --git a/softmmu/runstate.c b/softmmu/runstate.c index 1957caf73f..a9fbcf4862 100644 --- a/softmmu/runstate.c +++ b/softmmu/runstate.c @@ -727,18 +727,9 @@ static bool main_loop_should_exit(int *status) int qemu_main_loop(void) { int status = EXIT_SUCCESS; -#ifdef CONFIG_PROFILER - int64_t ti; -#endif while (!main_loop_should_exit(&status)) { -#ifdef CONFIG_PROFILER - ti = profile_getclock(); -#endif main_loop_wait(false); -#ifdef CONFIG_PROFILER - dev_time += profile_getclock() - ti; -#endif } return status; diff --git a/tcg/tcg.c b/tcg/tcg.c index 3fcd0d9f32..a0628fe424 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -3033,10 +3033,6 @@ void tcg_op_remove(TCGContext *s, TCGOp *op) QTAILQ_REMOVE(&s->ops, op, link); QTAILQ_INSERT_TAIL(&s->free_ops, op, link); s->nb_ops--; - -#ifdef CONFIG_PROFILER - qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1); -#endif } void tcg_remove_ops_after(TCGOp *op) @@ -5906,143 +5902,16 @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); } -#ifdef CONFIG_PROFILER - -/* avoid copy/paste errors */ -#define PROF_ADD(to, from, field) \ - do { \ - (to)->field += qatomic_read(&((from)->field)); \ - } while (0) - -#define PROF_MAX(to, from, field) \ - do { \ - typeof((from)->field) val__ = qatomic_read(&((from)->field)); \ - if (val__ > (to)->field) { \ - (to)->field = val__; \ - } \ - } while (0) - -/* Pass in a zero'ed @prof */ -static inline -void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) -{ - unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); - unsigned int i; - - for (i = 0; i < n_ctxs; i++) { - TCGContext *s = qatomic_read(&tcg_ctxs[i]); - const TCGProfile *orig = &s->prof; - - if (counters) { - PROF_ADD(prof, orig, cpu_exec_time); - PROF_ADD(prof, orig, tb_count1); - PROF_ADD(prof, orig, tb_count); - PROF_ADD(prof, orig, op_count); - PROF_MAX(prof, orig, op_count_max); - PROF_ADD(prof, orig, temp_count); - PROF_MAX(prof, orig, temp_count_max); - PROF_ADD(prof, orig, del_op_count); - PROF_ADD(prof, orig, code_in_len); - PROF_ADD(prof, orig, code_out_len); - PROF_ADD(prof, orig, search_out_len); - PROF_ADD(prof, orig, interm_time); - PROF_ADD(prof, orig, code_time); - PROF_ADD(prof, orig, la_time); - PROF_ADD(prof, orig, opt_time); - PROF_ADD(prof, orig, restore_count); - PROF_ADD(prof, orig, restore_time); - } - if (table) { - int i; - - for (i = 0; i < NB_OPS; i++) { - PROF_ADD(prof, orig, table_op_count[i]); - } - } - } -} - -#undef PROF_ADD -#undef PROF_MAX - -static void tcg_profile_snapshot_counters(TCGProfile *prof) -{ - tcg_profile_snapshot(prof, true, false); -} - -static void tcg_profile_snapshot_table(TCGProfile *prof) -{ - tcg_profile_snapshot(prof, false, true); -} - -void tcg_dump_op_count(GString *buf) -{ - TCGProfile prof = {}; - int i; - - tcg_profile_snapshot_table(&prof); - for (i = 0; i < NB_OPS; i++) { - g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name, - prof.table_op_count[i]); - } -} - -int64_t tcg_cpu_exec_time(void) -{ - unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); - unsigned int i; - int64_t ret = 0; - - for (i = 0; i < n_ctxs; i++) { - const TCGContext *s = qatomic_read(&tcg_ctxs[i]); - const TCGProfile *prof = &s->prof; - - ret += qatomic_read(&prof->cpu_exec_time); - } - return ret; -} -#else void tcg_dump_op_count(GString *buf) { g_string_append_printf(buf, "[TCG profiler not compiled]\n"); } -int64_t tcg_cpu_exec_time(void) -{ - error_report("%s: TCG profiler not compiled", __func__); - exit(EXIT_FAILURE); -} -#endif - - int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) { -#ifdef CONFIG_PROFILER - TCGProfile *prof = &s->prof; -#endif int i, start_words, num_insns; TCGOp *op; -#ifdef CONFIG_PROFILER - { - int n = 0; - - QTAILQ_FOREACH(op, &s->ops, link) { - n++; - } - qatomic_set(&prof->op_count, prof->op_count + n); - if (n > prof->op_count_max) { - qatomic_set(&prof->op_count_max, n); - } - - n = s->nb_temps; - qatomic_set(&prof->temp_count, prof->temp_count + n); - if (n > prof->temp_count_max) { - qatomic_set(&prof->temp_count_max, n); - } - } -#endif - if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) && qemu_log_in_addr_range(pc_start))) { FILE *logfile = qemu_log_trylock(); @@ -6071,17 +5940,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) } #endif -#ifdef CONFIG_PROFILER - qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); -#endif - tcg_optimize(s); -#ifdef CONFIG_PROFILER - qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); - qatomic_set(&prof->la_time, prof->la_time - profile_getclock()); -#endif - reachable_code_pass(s); liveness_pass_0(s); liveness_pass_1(s); @@ -6105,10 +5965,6 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) } } -#ifdef CONFIG_PROFILER - qatomic_set(&prof->la_time, prof->la_time + profile_getclock()); -#endif - if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) && qemu_log_in_addr_range(pc_start))) { FILE *logfile = qemu_log_trylock(); @@ -6151,10 +6007,6 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) QTAILQ_FOREACH(op, &s->ops, link) { TCGOpcode opc = op->opc; -#ifdef CONFIG_PROFILER - qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); -#endif - switch (opc) { case INDEX_op_mov_i32: case INDEX_op_mov_i64: @@ -6249,76 +6101,10 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) return tcg_current_code_size(s); } -#ifdef CONFIG_PROFILER -void tcg_dump_info(GString *buf) -{ - TCGProfile prof = {}; - const TCGProfile *s; - int64_t tb_count; - int64_t tb_div_count; - int64_t tot; - - tcg_profile_snapshot_counters(&prof); - s = &prof; - tb_count = s->tb_count; - tb_div_count = tb_count ? tb_count : 1; - tot = s->interm_time + s->code_time; - - g_string_append_printf(buf, "JIT cycles %" PRId64 - " (%0.3f s at 2.4 GHz)\n", - tot, tot / 2.4e9); - g_string_append_printf(buf, "translated TBs %" PRId64 - " (aborted=%" PRId64 " %0.1f%%)\n", - tb_count, s->tb_count1 - tb_count, - (double)(s->tb_count1 - s->tb_count) - / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); - g_string_append_printf(buf, "avg ops/TB %0.1f max=%d\n", - (double)s->op_count / tb_div_count, s->op_count_max); - g_string_append_printf(buf, "deleted ops/TB %0.2f\n", - (double)s->del_op_count / tb_div_count); - g_string_append_printf(buf, "avg temps/TB %0.2f max=%d\n", - (double)s->temp_count / tb_div_count, - s->temp_count_max); - g_string_append_printf(buf, "avg host code/TB %0.1f\n", - (double)s->code_out_len / tb_div_count); - g_string_append_printf(buf, "avg search data/TB %0.1f\n", - (double)s->search_out_len / tb_div_count); - - g_string_append_printf(buf, "cycles/op %0.1f\n", - s->op_count ? (double)tot / s->op_count : 0); - g_string_append_printf(buf, "cycles/in byte %0.1f\n", - s->code_in_len ? (double)tot / s->code_in_len : 0); - g_string_append_printf(buf, "cycles/out byte %0.1f\n", - s->code_out_len ? (double)tot / s->code_out_len : 0); - g_string_append_printf(buf, "cycles/search byte %0.1f\n", - s->search_out_len ? - (double)tot / s->search_out_len : 0); - if (tot == 0) { - tot = 1; - } - g_string_append_printf(buf, " gen_interm time %0.1f%%\n", - (double)s->interm_time / tot * 100.0); - g_string_append_printf(buf, " gen_code time %0.1f%%\n", - (double)s->code_time / tot * 100.0); - g_string_append_printf(buf, "optim./code time %0.1f%%\n", - (double)s->opt_time / (s->code_time ? - s->code_time : 1) - * 100.0); - g_string_append_printf(buf, "liveness/code time %0.1f%%\n", - (double)s->la_time / (s->code_time ? - s->code_time : 1) * 100.0); - g_string_append_printf(buf, "cpu_restore count %" PRId64 "\n", - s->restore_count); - g_string_append_printf(buf, " avg cycles %0.1f\n", - s->restore_count ? - (double)s->restore_time / s->restore_count : 0); -} -#else void tcg_dump_info(GString *buf) { g_string_append_printf(buf, "[TCG profiler not compiled]\n"); } -#endif #ifdef ELF_HOST_MACHINE /* In order to use this feature, the backend needs to do three things: diff --git a/tests/qtest/qmp-cmd-test.c b/tests/qtest/qmp-cmd-test.c index a58de48d2a..73a670e8fa 100644 --- a/tests/qtest/qmp-cmd-test.c +++ b/tests/qtest/qmp-cmd-test.c @@ -46,9 +46,6 @@ static int query_error_class(const char *cmd) { "query-balloon", ERROR_CLASS_DEVICE_NOT_ACTIVE }, { "query-hotpluggable-cpus", ERROR_CLASS_GENERIC_ERROR }, { "query-vm-generation-id", ERROR_CLASS_GENERIC_ERROR }, -#ifndef CONFIG_PROFILER - { "x-query-profile", ERROR_CLASS_GENERIC_ERROR }, -#endif /* Only valid with a USB bus added */ { "x-query-usb", ERROR_CLASS_GENERIC_ERROR }, /* Only valid with accel=tcg */ From 70bfde9a7de7fa3495d9e2bd7142cb1bc656484e Mon Sep 17 00:00:00 2001 From: Max Chou Date: Fri, 23 Jun 2023 00:16:24 +0800 Subject: [PATCH 15/22] tcg: Fix temporary variable in tcg_gen_gvec_andcs The 5th parameter of tcg_gen_gvec_2s should be replaced by the temporary tmp variable in the tcg_gen_gvec_andcs function. Reviewed-by: Daniel Henrique Barboza Signed-off-by: Max Chou Message-Id: <20230622161646.32005-9-max.chou@sifive.com> Signed-off-by: Richard Henderson --- tcg/tcg-op-gvec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c index 95a588d6d2..a062239804 100644 --- a/tcg/tcg-op-gvec.c +++ b/tcg/tcg-op-gvec.c @@ -2774,7 +2774,7 @@ void tcg_gen_gvec_andcs(unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_i64 tmp = tcg_temp_ebb_new_i64(); tcg_gen_dup_i64(vece, tmp, c); - tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &g); + tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &g); tcg_temp_free_i64(tmp); } From f6ff4923b92ceefbe5650c3e90ccdcc57dc60fb7 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 19 Jun 2023 11:08:41 +0200 Subject: [PATCH 16/22] target/microblaze: Define TCG_GUEST_DEFAULT_MO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The microblaze architecture does not reorder instructions. While there is an MBAR wait-for-data-access instruction, this concerns synchronizing with DMA. This should have been defined when enabling MTTCG. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Edgar E. Iglesias Fixes: d449561b130 ("configure: microblaze: Enable mttcg") Signed-off-by: Richard Henderson --- target/microblaze/cpu.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h index 3525de144c..a7b040abd4 100644 --- a/target/microblaze/cpu.h +++ b/target/microblaze/cpu.h @@ -24,6 +24,9 @@ #include "exec/cpu-defs.h" #include "qemu/cpu-float.h" +/* MicroBlaze is always in-order. */ +#define TCG_GUEST_DEFAULT_MO TCG_MO_ALL + typedef struct CPUArchState CPUMBState; #if !defined(CONFIG_USER_ONLY) #include "mmu.h" From c914d46d0a645e7c633292146f4e38c945d4f847 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 18 Feb 2021 16:47:35 +0000 Subject: [PATCH 17/22] tcg: Do not elide memory barriers for !CF_PARALLEL in system mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The virtio devices require proper memory ordering between the vcpus and the iothreads. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/tcg-op.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index c07de5d9f8..7aadb37756 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -102,7 +102,19 @@ void tcg_gen_br(TCGLabel *l) void tcg_gen_mb(TCGBar mb_type) { - if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { +#ifdef CONFIG_USER_ONLY + bool parallel = tcg_ctx->gen_tb->cflags & CF_PARALLEL; +#else + /* + * It is tempting to elide the barrier in a uniprocessor context. + * However, even with a single cpu we have i/o threads running in + * parallel, and lack of memory order can result in e.g. virtio + * queue entries being read incorrectly. + */ + bool parallel = true; +#endif + + if (parallel) { tcg_gen_op1(INDEX_op_mb, mb_type); } } From f86e8f3d138de112d66ec28792e0fd303bf129ca Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 3 Mar 2022 15:57:10 +0000 Subject: [PATCH 18/22] tcg: Add host memory barriers to cpu_ldst.h interfaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bring the helpers into line with the rest of tcg in respecting guest memory ordering. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 10 ++++++++++ accel/tcg/internal.h | 34 ++++++++++++++++++++++++++++++++++ accel/tcg/user-exec.c | 10 ++++++++++ 3 files changed, 54 insertions(+) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index e02cfc550e..5666a8e23a 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -2339,6 +2339,7 @@ static uint8_t do_ld1_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, MMULookupLocals l; bool crosspage; + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l); tcg_debug_assert(!crosspage); @@ -2360,6 +2361,7 @@ static uint16_t do_ld2_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, uint16_t ret; uint8_t a, b; + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l); if (likely(!crosspage)) { return do_ld_2(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra); @@ -2390,6 +2392,7 @@ static uint32_t do_ld4_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, bool crosspage; uint32_t ret; + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l); if (likely(!crosspage)) { return do_ld_4(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra); @@ -2417,6 +2420,7 @@ static uint64_t do_ld8_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, bool crosspage; uint64_t ret; + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l); if (likely(!crosspage)) { return do_ld_8(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra); @@ -2469,6 +2473,7 @@ static Int128 do_ld16_mmu(CPUArchState *env, vaddr addr, Int128 ret; int first; + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD, &l); if (likely(!crosspage)) { /* Perform the load host endian. */ @@ -2802,6 +2807,7 @@ void helper_stb_mmu(CPUArchState *env, uint64_t addr, uint32_t val, bool crosspage; tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_8); + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); tcg_debug_assert(!crosspage); @@ -2815,6 +2821,7 @@ static void do_st2_mmu(CPUArchState *env, vaddr addr, uint16_t val, bool crosspage; uint8_t a, b; + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); if (likely(!crosspage)) { do_st_2(env, &l.page[0], val, l.mmu_idx, l.memop, ra); @@ -2843,6 +2850,7 @@ static void do_st4_mmu(CPUArchState *env, vaddr addr, uint32_t val, MMULookupLocals l; bool crosspage; + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); if (likely(!crosspage)) { do_st_4(env, &l.page[0], val, l.mmu_idx, l.memop, ra); @@ -2870,6 +2878,7 @@ static void do_st8_mmu(CPUArchState *env, vaddr addr, uint64_t val, MMULookupLocals l; bool crosspage; + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); if (likely(!crosspage)) { do_st_8(env, &l.page[0], val, l.mmu_idx, l.memop, ra); @@ -2899,6 +2908,7 @@ static void do_st16_mmu(CPUArchState *env, vaddr addr, Int128 val, uint64_t a, b; int first; + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); if (likely(!crosspage)) { /* Swap to host endian if necessary, then store. */ diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h index 91f308bdfa..650c3ac53f 100644 --- a/accel/tcg/internal.h +++ b/accel/tcg/internal.h @@ -78,4 +78,38 @@ extern int64_t max_advance; extern bool one_insn_per_tb; +/** + * tcg_req_mo: + * @type: TCGBar + * + * Filter @type to the barrier that is required for the guest + * memory ordering vs the host memory ordering. A non-zero + * result indicates that some barrier is required. + * + * If TCG_GUEST_DEFAULT_MO is not defined, assume that the + * guest requires strict ordering. + * + * This is a macro so that it's constant even without optimization. + */ +#ifdef TCG_GUEST_DEFAULT_MO +# define tcg_req_mo(type) \ + ((type) & TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) +#else +# define tcg_req_mo(type) ((type) & ~TCG_TARGET_DEFAULT_MO) +#endif + +/** + * cpu_req_mo: + * @type: TCGBar + * + * If tcg_req_mo indicates a barrier for @type is required + * for the guest memory model, issue a host memory barrier. + */ +#define cpu_req_mo(type) \ + do { \ + if (tcg_req_mo(type)) { \ + smp_mb(); \ + } \ + } while (0) + #endif /* ACCEL_TCG_INTERNAL_H */ diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index f8b16d6ab8..8fbcbf9771 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -914,6 +914,7 @@ static uint8_t do_ld1_mmu(CPUArchState *env, abi_ptr addr, uint8_t ret; tcg_debug_assert((mop & MO_SIZE) == MO_8); + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD); ret = ldub_p(haddr); clear_helper_retaddr(); @@ -947,6 +948,7 @@ static uint16_t do_ld2_mmu(CPUArchState *env, abi_ptr addr, uint16_t ret; tcg_debug_assert((mop & MO_SIZE) == MO_16); + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD); ret = load_atom_2(env, ra, haddr, mop); clear_helper_retaddr(); @@ -984,6 +986,7 @@ static uint32_t do_ld4_mmu(CPUArchState *env, abi_ptr addr, uint32_t ret; tcg_debug_assert((mop & MO_SIZE) == MO_32); + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD); ret = load_atom_4(env, ra, haddr, mop); clear_helper_retaddr(); @@ -1021,6 +1024,7 @@ static uint64_t do_ld8_mmu(CPUArchState *env, abi_ptr addr, uint64_t ret; tcg_debug_assert((mop & MO_SIZE) == MO_64); + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD); ret = load_atom_8(env, ra, haddr, mop); clear_helper_retaddr(); @@ -1052,6 +1056,7 @@ static Int128 do_ld16_mmu(CPUArchState *env, abi_ptr addr, Int128 ret; tcg_debug_assert((mop & MO_SIZE) == MO_128); + cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD); ret = load_atom_16(env, ra, haddr, mop); clear_helper_retaddr(); @@ -1087,6 +1092,7 @@ static void do_st1_mmu(CPUArchState *env, abi_ptr addr, uint8_t val, void *haddr; tcg_debug_assert((mop & MO_SIZE) == MO_8); + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE); stb_p(haddr, val); clear_helper_retaddr(); @@ -1111,6 +1117,7 @@ static void do_st2_mmu(CPUArchState *env, abi_ptr addr, uint16_t val, void *haddr; tcg_debug_assert((mop & MO_SIZE) == MO_16); + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE); if (mop & MO_BSWAP) { @@ -1139,6 +1146,7 @@ static void do_st4_mmu(CPUArchState *env, abi_ptr addr, uint32_t val, void *haddr; tcg_debug_assert((mop & MO_SIZE) == MO_32); + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE); if (mop & MO_BSWAP) { @@ -1167,6 +1175,7 @@ static void do_st8_mmu(CPUArchState *env, abi_ptr addr, uint64_t val, void *haddr; tcg_debug_assert((mop & MO_SIZE) == MO_64); + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE); if (mop & MO_BSWAP) { @@ -1195,6 +1204,7 @@ static void do_st16_mmu(CPUArchState *env, abi_ptr addr, Int128 val, void *haddr; tcg_debug_assert((mop & MO_SIZE) == MO_128); + cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE); if (mop & MO_BSWAP) { From 97e1576957c28a5fbcc810f92643e52069cc49b7 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 3 Mar 2022 15:57:27 +0000 Subject: [PATCH 19/22] accel/tcg: Remove check_tcg_memory_orders_compatible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We now issue host memory barriers to match the guest memory order. Continue to disable MTTCG only if the guest has not been ported. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/tcg-all.c | 39 ++++++++++----------------------------- 1 file changed, 10 insertions(+), 29 deletions(-) diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c index 02af6a2891..03dfd67e9e 100644 --- a/accel/tcg/tcg-all.c +++ b/accel/tcg/tcg-all.c @@ -64,37 +64,23 @@ DECLARE_INSTANCE_CHECKER(TCGState, TCG_STATE, * they can set the appropriate CONFIG flags in ${target}-softmmu.mak * * Once a guest architecture has been converted to the new primitives - * there are two remaining limitations to check. - * - * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host) - * - The host must have a stronger memory order than the guest - * - * It may be possible in future to support strong guests on weak hosts - * but that will require tagging all load/stores in a guest with their - * implicit memory order requirements which would likely slow things - * down a lot. + * there is one remaining limitation to check: + * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host) */ -static bool check_tcg_memory_orders_compatible(void) -{ -#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO) - return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0; -#else - return false; -#endif -} - static bool default_mttcg_enabled(void) { if (icount_enabled() || TCG_OVERSIZED_GUEST) { return false; - } else { -#ifdef TARGET_SUPPORTS_MTTCG - return check_tcg_memory_orders_compatible(); -#else - return false; -#endif } +#ifdef TARGET_SUPPORTS_MTTCG +# ifndef TCG_GUEST_DEFAULT_MO +# error "TARGET_SUPPORTS_MTTCG without TCG_GUEST_DEFAULT_MO" +# endif + return true; +#else + return false; +#endif } static void tcg_accel_instance_init(Object *obj) @@ -162,11 +148,6 @@ static void tcg_set_thread(Object *obj, const char *value, Error **errp) warn_report("Guest not yet converted to MTTCG - " "you may get unexpected results"); #endif - if (!check_tcg_memory_orders_compatible()) { - warn_report("Guest expects a stronger memory ordering " - "than the host provides"); - error_printf("This may cause strange/hard to debug errors\n"); - } s->mttcg_enabled = true; } } else if (strcmp(value, "single") == 0) { From 58e8f1f616d117aed6283690419dc16f53b7a202 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 22 Feb 2023 19:17:52 -1000 Subject: [PATCH 20/22] accel/tcg: Store some tlb flags in CPUTLBEntryFull MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have run out of bits we can use within the CPUTLBEntry comparators, as TLB_FLAGS_MASK cannot overlap alignment. Store slow_flags[] in CPUTLBEntryFull, and merge with the flags from the comparator. A new TLB_FORCE_SLOW bit is set within the comparator as an indication that the slow path must be used. Move TLB_BSWAP to TLB_SLOW_FLAGS_MASK. Since we are out of bits, we cannot create a new bit without moving an old one. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 98 ++++++++++++++++++++++++----------------- include/exec/cpu-all.h | 21 +++++++-- include/exec/cpu-defs.h | 6 +++ include/hw/core/cpu.h | 1 + 4 files changed, 82 insertions(+), 44 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 5666a8e23a..3671846744 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1107,6 +1107,24 @@ static void tlb_add_large_page(CPUArchState *env, int mmu_idx, env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask; } +static inline void tlb_set_compare(CPUTLBEntryFull *full, CPUTLBEntry *ent, + target_ulong address, int flags, + MMUAccessType access_type, bool enable) +{ + if (enable) { + address |= flags & TLB_FLAGS_MASK; + flags &= TLB_SLOW_FLAGS_MASK; + if (flags) { + address |= TLB_FORCE_SLOW; + } + } else { + address = -1; + flags = 0; + } + ent->addr_idx[access_type] = address; + full->slow_flags[access_type] = flags; +} + /* * Add a new TLB entry. At most one entry for a given virtual address * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the @@ -1122,9 +1140,7 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, CPUTLB *tlb = env_tlb(env); CPUTLBDesc *desc = &tlb->d[mmu_idx]; MemoryRegionSection *section; - unsigned int index; - vaddr address; - vaddr write_address; + unsigned int index, read_flags, write_flags; uintptr_t addend; CPUTLBEntry *te, tn; hwaddr iotlb, xlat, sz, paddr_page; @@ -1153,13 +1169,13 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, " prot=%x idx=%d\n", addr, full->phys_addr, prot, mmu_idx); - address = addr_page; + read_flags = 0; if (full->lg_page_size < TARGET_PAGE_BITS) { /* Repeat the MMU check and TLB fill on every access. */ - address |= TLB_INVALID_MASK; + read_flags |= TLB_INVALID_MASK; } if (full->attrs.byte_swap) { - address |= TLB_BSWAP; + read_flags |= TLB_BSWAP; } is_ram = memory_region_is_ram(section->mr); @@ -1173,7 +1189,7 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, addend = 0; } - write_address = address; + write_flags = read_flags; if (is_ram) { iotlb = memory_region_get_ram_addr(section->mr) + xlat; /* @@ -1182,9 +1198,9 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, */ if (prot & PAGE_WRITE) { if (section->readonly) { - write_address |= TLB_DISCARD_WRITE; + write_flags |= TLB_DISCARD_WRITE; } else if (cpu_physical_memory_is_clean(iotlb)) { - write_address |= TLB_NOTDIRTY; + write_flags |= TLB_NOTDIRTY; } } } else { @@ -1195,9 +1211,9 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, * Reads to romd devices go through the ram_ptr found above, * but of course reads to I/O must go through MMIO. */ - write_address |= TLB_MMIO; + write_flags |= TLB_MMIO; if (!is_romd) { - address = write_address; + read_flags = write_flags; } } @@ -1242,7 +1258,7 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, * TARGET_PAGE_BITS, and either * + the ram_addr_t of the page base of the target RAM (RAM) * + the offset within section->mr of the page base (I/O, ROMD) - * We subtract the vaddr_page (which is page aligned and thus won't + * We subtract addr_page (which is page aligned and thus won't * disturb the low bits) to give an offset which can be added to the * (non-page-aligned) vaddr of the eventual memory access to get * the MemoryRegion offset for the access. Note that the vaddr we @@ -1250,36 +1266,30 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, * vaddr we add back in io_readx()/io_writex()/get_page_addr_code(). */ desc->fulltlb[index] = *full; - desc->fulltlb[index].xlat_section = iotlb - addr_page; - desc->fulltlb[index].phys_addr = paddr_page; + full = &desc->fulltlb[index]; + full->xlat_section = iotlb - addr_page; + full->phys_addr = paddr_page; /* Now calculate the new entry */ tn.addend = addend - addr_page; - if (prot & PAGE_READ) { - tn.addr_read = address; - if (wp_flags & BP_MEM_READ) { - tn.addr_read |= TLB_WATCHPOINT; - } - } else { - tn.addr_read = -1; - } - if (prot & PAGE_EXEC) { - tn.addr_code = address; - } else { - tn.addr_code = -1; - } + tlb_set_compare(full, &tn, addr_page, read_flags, + MMU_INST_FETCH, prot & PAGE_EXEC); - tn.addr_write = -1; - if (prot & PAGE_WRITE) { - tn.addr_write = write_address; - if (prot & PAGE_WRITE_INV) { - tn.addr_write |= TLB_INVALID_MASK; - } - if (wp_flags & BP_MEM_WRITE) { - tn.addr_write |= TLB_WATCHPOINT; - } + if (wp_flags & BP_MEM_READ) { + read_flags |= TLB_WATCHPOINT; } + tlb_set_compare(full, &tn, addr_page, read_flags, + MMU_DATA_LOAD, prot & PAGE_READ); + + if (prot & PAGE_WRITE_INV) { + write_flags |= TLB_INVALID_MASK; + } + if (wp_flags & BP_MEM_WRITE) { + write_flags |= TLB_WATCHPOINT; + } + tlb_set_compare(full, &tn, addr_page, write_flags, + MMU_DATA_STORE, prot & PAGE_WRITE); copy_tlb_helper_locked(te, &tn); tlb_n_used_entries_inc(env, mmu_idx); @@ -1509,7 +1519,8 @@ static int probe_access_internal(CPUArchState *env, vaddr addr, CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); uint64_t tlb_addr = tlb_read_idx(entry, access_type); vaddr page_addr = addr & TARGET_PAGE_MASK; - int flags = TLB_FLAGS_MASK; + int flags = TLB_FLAGS_MASK & ~TLB_FORCE_SLOW; + CPUTLBEntryFull *full; if (!tlb_hit_page(tlb_addr, page_addr)) { if (!victim_tlb_hit(env, mmu_idx, index, access_type, page_addr)) { @@ -1538,7 +1549,8 @@ static int probe_access_internal(CPUArchState *env, vaddr addr, } flags &= tlb_addr; - *pfull = &env_tlb(env)->d[mmu_idx].fulltlb[index]; + *pfull = full = &env_tlb(env)->d[mmu_idx].fulltlb[index]; + flags |= full->slow_flags[access_type]; /* Fold all "mmio-like" bits into TLB_MMIO. This is not RAM. */ if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) { @@ -1761,6 +1773,8 @@ static bool mmu_lookup1(CPUArchState *env, MMULookupPageData *data, CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); uint64_t tlb_addr = tlb_read_idx(entry, access_type); bool maybe_resized = false; + CPUTLBEntryFull *full; + int flags; /* If the TLB entry is for a different page, reload and try again. */ if (!tlb_hit(tlb_addr, addr)) { @@ -1774,8 +1788,12 @@ static bool mmu_lookup1(CPUArchState *env, MMULookupPageData *data, tlb_addr = tlb_read_idx(entry, access_type) & ~TLB_INVALID_MASK; } - data->flags = tlb_addr & TLB_FLAGS_MASK; - data->full = &env_tlb(env)->d[mmu_idx].fulltlb[index]; + full = &env_tlb(env)->d[mmu_idx].fulltlb[index]; + flags = tlb_addr & (TLB_FLAGS_MASK & ~TLB_FORCE_SLOW); + flags |= full->slow_flags[access_type]; + + data->full = full; + data->flags = flags; /* Compute haddr speculatively; depending on flags it might be invalid. */ data->haddr = (void *)((uintptr_t)addr + entry->addend); diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index 09bf4c0cc6..4422f4bb07 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -327,17 +327,30 @@ CPUArchState *cpu_copy(CPUArchState *env); #define TLB_MMIO (1 << (TARGET_PAGE_BITS_MIN - 3)) /* Set if TLB entry contains a watchpoint. */ #define TLB_WATCHPOINT (1 << (TARGET_PAGE_BITS_MIN - 4)) -/* Set if TLB entry requires byte swap. */ -#define TLB_BSWAP (1 << (TARGET_PAGE_BITS_MIN - 5)) +/* Set if the slow path must be used; more flags in CPUTLBEntryFull. */ +#define TLB_FORCE_SLOW (1 << (TARGET_PAGE_BITS_MIN - 5)) /* Set if TLB entry writes ignored. */ #define TLB_DISCARD_WRITE (1 << (TARGET_PAGE_BITS_MIN - 6)) -/* Use this mask to check interception with an alignment mask +/* + * Use this mask to check interception with an alignment mask * in a TCG backend. */ #define TLB_FLAGS_MASK \ (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO \ - | TLB_WATCHPOINT | TLB_BSWAP | TLB_DISCARD_WRITE) + | TLB_WATCHPOINT | TLB_FORCE_SLOW | TLB_DISCARD_WRITE) + +/* + * Flags stored in CPUTLBEntryFull.slow_flags[x]. + * TLB_FORCE_SLOW must be set in CPUTLBEntry.addr_idx[x]. + */ +/* Set if TLB entry requires byte swap. */ +#define TLB_BSWAP (1 << 0) + +#define TLB_SLOW_FLAGS_MASK TLB_BSWAP + +/* The two sets of flags must not overlap. */ +QEMU_BUILD_BUG_ON(TLB_FLAGS_MASK & TLB_SLOW_FLAGS_MASK); /** * tlb_hit_page: return true if page aligned @addr is a hit against the diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h index e6a079402e..fb4c8d480f 100644 --- a/include/exec/cpu-defs.h +++ b/include/exec/cpu-defs.h @@ -124,6 +124,12 @@ typedef struct CPUTLBEntryFull { /* @lg_page_size contains the log2 of the page size. */ uint8_t lg_page_size; + /* + * Additional tlb flags for use by the slow path. If non-zero, + * the corresponding CPUTLBEntry comparator must have TLB_FORCE_SLOW. + */ + uint8_t slow_flags[MMU_ACCESS_COUNT]; + /* * Allow target-specific additions to this structure. * This may be used to cache items from the guest cpu diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index 3b765beb9b..eda0230a02 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -84,6 +84,7 @@ typedef enum MMUAccessType { MMU_DATA_LOAD = 0, MMU_DATA_STORE = 1, MMU_INST_FETCH = 2 +#define MMU_ACCESS_COUNT 3 } MMUAccessType; typedef struct CPUWatchpoint CPUWatchpoint; From 187ba6945345cedf2f343fcc33e5616186aebe9d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 22 Feb 2023 23:09:43 -1000 Subject: [PATCH 21/22] accel/tcg: Move TLB_WATCHPOINT to TLB_SLOW_FLAGS_MASK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This frees up one bit of the primary tlb flags without impacting the TLB_NOTDIRTY logic. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 18 ++++++++++++++---- include/exec/cpu-all.h | 8 ++++---- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 3671846744..5b51eff5a4 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1981,7 +1981,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi, */ goto stop_the_world; } - /* Collect TLB_WATCHPOINT for read. */ + /* Collect tlb flags for read. */ tlb_addr |= tlbe->addr_read; /* Notice an IO access or a needs-MMU-lookup access */ @@ -1998,9 +1998,19 @@ static void *atomic_mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi, notdirty_write(env_cpu(env), addr, size, full, retaddr); } - if (unlikely(tlb_addr & TLB_WATCHPOINT)) { - cpu_check_watchpoint(env_cpu(env), addr, size, full->attrs, - BP_MEM_READ | BP_MEM_WRITE, retaddr); + if (unlikely(tlb_addr & TLB_FORCE_SLOW)) { + int wp_flags = 0; + + if (full->slow_flags[MMU_DATA_STORE] & TLB_WATCHPOINT) { + wp_flags |= BP_MEM_WRITE; + } + if (full->slow_flags[MMU_DATA_LOAD] & TLB_WATCHPOINT) { + wp_flags |= BP_MEM_READ; + } + if (wp_flags) { + cpu_check_watchpoint(env_cpu(env), addr, size, + full->attrs, wp_flags, retaddr); + } } return hostaddr; diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index 4422f4bb07..b5618613cc 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -325,8 +325,6 @@ CPUArchState *cpu_copy(CPUArchState *env); #define TLB_NOTDIRTY (1 << (TARGET_PAGE_BITS_MIN - 2)) /* Set if TLB entry is an IO callback. */ #define TLB_MMIO (1 << (TARGET_PAGE_BITS_MIN - 3)) -/* Set if TLB entry contains a watchpoint. */ -#define TLB_WATCHPOINT (1 << (TARGET_PAGE_BITS_MIN - 4)) /* Set if the slow path must be used; more flags in CPUTLBEntryFull. */ #define TLB_FORCE_SLOW (1 << (TARGET_PAGE_BITS_MIN - 5)) /* Set if TLB entry writes ignored. */ @@ -338,7 +336,7 @@ CPUArchState *cpu_copy(CPUArchState *env); */ #define TLB_FLAGS_MASK \ (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO \ - | TLB_WATCHPOINT | TLB_FORCE_SLOW | TLB_DISCARD_WRITE) + | TLB_FORCE_SLOW | TLB_DISCARD_WRITE) /* * Flags stored in CPUTLBEntryFull.slow_flags[x]. @@ -346,8 +344,10 @@ CPUArchState *cpu_copy(CPUArchState *env); */ /* Set if TLB entry requires byte swap. */ #define TLB_BSWAP (1 << 0) +/* Set if TLB entry contains a watchpoint. */ +#define TLB_WATCHPOINT (1 << 1) -#define TLB_SLOW_FLAGS_MASK TLB_BSWAP +#define TLB_SLOW_FLAGS_MASK (TLB_BSWAP | TLB_WATCHPOINT) /* The two sets of flags must not overlap. */ QEMU_BUILD_BUG_ON(TLB_FLAGS_MASK & TLB_SLOW_FLAGS_MASK); From a0eaae08c7c6a59c185cf646b02f4167b2ac6ec0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 21 Jun 2023 12:27:27 +0200 Subject: [PATCH 22/22] accel/tcg: Renumber TLB_DISCARD_WRITE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move to fill a hole in the set of bits. Reduce the total number of tlb bits by 1. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- include/exec/cpu-all.h | 4 ++-- tcg/tcg-op-ldst.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index b5618613cc..8018ce783e 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -325,10 +325,10 @@ CPUArchState *cpu_copy(CPUArchState *env); #define TLB_NOTDIRTY (1 << (TARGET_PAGE_BITS_MIN - 2)) /* Set if TLB entry is an IO callback. */ #define TLB_MMIO (1 << (TARGET_PAGE_BITS_MIN - 3)) +/* Set if TLB entry writes ignored. */ +#define TLB_DISCARD_WRITE (1 << (TARGET_PAGE_BITS_MIN - 4)) /* Set if the slow path must be used; more flags in CPUTLBEntryFull. */ #define TLB_FORCE_SLOW (1 << (TARGET_PAGE_BITS_MIN - 5)) -/* Set if TLB entry writes ignored. */ -#define TLB_DISCARD_WRITE (1 << (TARGET_PAGE_BITS_MIN - 6)) /* * Use this mask to check interception with an alignment mask diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c index a4f51bfb6e..0fcc1618e5 100644 --- a/tcg/tcg-op-ldst.c +++ b/tcg/tcg-op-ldst.c @@ -39,7 +39,7 @@ static void check_max_alignment(unsigned a_bits) * The requested alignment cannot overlap the TLB flags. * FIXME: Must keep the count up-to-date with "exec/cpu-all.h". */ - tcg_debug_assert(a_bits + 6 <= tcg_ctx->page_bits); + tcg_debug_assert(a_bits + 5 <= tcg_ctx->page_bits); #endif }