target/m68k: Fix gen_load_fp regression

accel/tcg: Ensure fairness with icount
 disas: Move disas.c into the target-independent source sets
 tcg: Use common routines for calling slow path helpers
 tcg/*: Cleanups to qemu_ld/st constraints
 tcg: Remove TARGET_ALIGNED_ONLY
 accel/tcg: Reorg system mode load/store helpers
 -----BEGIN PGP SIGNATURE-----
 
 iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmRcxtYdHHJpY2hhcmQu
 aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV9arQf8Di7CnMQE/jW+8w6v
 5af0dX8/St2JnCXzG+qiW6mJm50Cy4GunCN66JcCAswpENvQLLsJP13c+4KTeB1T
 rGBbedFXTw1LsaoOcBvwhq7RTIROz4GESTS4EZoJMlMhMv0VotekUPPz4NFMZRKX
 LMvShM2C+f2p4HmDnnbki7M3+tMqpgoGCeBFX8Jy7/5sbpS/7ceXRio3ZRAhasPu
 vjA0zqUtoTs7ijKpXf3uRl/c7xql+f0d7SDdCRt4OKasfLCCDwkjtMf6plZ2jzuS
 OgwKc5N1jaMF6erHYZJIbfLLdUl20/JJEcbpU3Eh1XuHnzn1msS9JDOm2tvzwsto
 OpOKUg==
 =Lhy3
 -----END PGP SIGNATURE-----

Merge tag 'pull-tcg-20230511-2' of https://gitlab.com/rth7680/qemu into staging

target/m68k: Fix gen_load_fp regression
accel/tcg: Ensure fairness with icount
disas: Move disas.c into the target-independent source sets
tcg: Use common routines for calling slow path helpers
tcg/*: Cleanups to qemu_ld/st constraints
tcg: Remove TARGET_ALIGNED_ONLY
accel/tcg: Reorg system mode load/store helpers

# -----BEGIN PGP SIGNATURE-----
#
# iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmRcxtYdHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV9arQf8Di7CnMQE/jW+8w6v
# 5af0dX8/St2JnCXzG+qiW6mJm50Cy4GunCN66JcCAswpENvQLLsJP13c+4KTeB1T
# rGBbedFXTw1LsaoOcBvwhq7RTIROz4GESTS4EZoJMlMhMv0VotekUPPz4NFMZRKX
# LMvShM2C+f2p4HmDnnbki7M3+tMqpgoGCeBFX8Jy7/5sbpS/7ceXRio3ZRAhasPu
# vjA0zqUtoTs7ijKpXf3uRl/c7xql+f0d7SDdCRt4OKasfLCCDwkjtMf6plZ2jzuS
# OgwKc5N1jaMF6erHYZJIbfLLdUl20/JJEcbpU3Eh1XuHnzn1msS9JDOm2tvzwsto
# OpOKUg==
# =Lhy3
# -----END PGP SIGNATURE-----
# gpg: Signature made Thu 11 May 2023 11:43:34 AM BST
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [ultimate]

* tag 'pull-tcg-20230511-2' of https://gitlab.com/rth7680/qemu: (53 commits)
  target/loongarch: Do not include tcg-ldst.h
  accel/tcg: Reorg system mode store helpers
  accel/tcg: Reorg system mode load helpers
  accel/tcg: Introduce tlb_read_idx
  accel/tcg: Add cpu_in_serial_context
  tcg: Remove TARGET_ALIGNED_ONLY
  target/sh4: Remove TARGET_ALIGNED_ONLY
  target/sh4: Use MO_ALIGN where required
  target/nios2: Remove TARGET_ALIGNED_ONLY
  target/mips: Remove TARGET_ALIGNED_ONLY
  target/mips: Use MO_ALIGN instead of 0
  target/mips: Add missing default_tcg_memop_mask
  target/mips: Add MO_ALIGN to gen_llwp, gen_scwp
  tcg/s390x: Simplify constraints on qemu_ld/st
  tcg/s390x: Use ALGFR in constructing softmmu host address
  tcg/riscv: Simplify constraints on qemu_ld/st
  tcg/ppc: Remove unused constraint J
  tcg/ppc: Remove unused constraints A, B, C, D
  tcg/ppc: Adjust constraints on qemu_ld/st
  tcg/ppc: Reorg tcg_out_tlb_read
  ...

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2023-05-11 11:44:23 +01:00
commit 278238505d
68 changed files with 2789 additions and 3040 deletions

View file

@ -22,6 +22,7 @@
#include "sysemu/tcg.h"
#include "exec/exec-all.h"
#include "qemu/plugin.h"
#include "internal.h"
bool tcg_allowed;
@ -81,6 +82,8 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
void cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc)
{
/* Prevent looping if already executing in a serial context. */
g_assert(!cpu_in_serial_context(cpu));
cpu->exception_index = EXCP_ATOMIC;
cpu_loop_exit_restore(cpu, pc);
}

File diff suppressed because it is too large Load diff

View file

@ -64,6 +64,15 @@ static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb)
}
}
/*
* Return true if CS is not running in parallel with other cpus, either
* because there are no other cpus or we are within an exclusive context.
*/
static inline bool cpu_in_serial_context(CPUState *cs)
{
return !(cs->tcg_cflags & CF_PARALLEL) || cpu_in_exclusive_context(cs);
}
extern int64_t max_delay;
extern int64_t max_advance;

View file

@ -760,7 +760,7 @@ void tb_flush(CPUState *cpu)
if (tcg_enabled()) {
unsigned tb_flush_count = qatomic_read(&tb_ctx.tb_flush_count);
if (cpu_in_exclusive_context(cpu)) {
if (cpu_in_serial_context(cpu)) {
do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
} else {
async_safe_run_on_cpu(cpu, do_tb_flush,

View file

@ -89,7 +89,20 @@ void icount_handle_deadline(void)
}
}
void icount_prepare_for_run(CPUState *cpu)
/* Distribute the budget evenly across all CPUs */
int64_t icount_percpu_budget(int cpu_count)
{
int64_t limit = icount_get_limit();
int64_t timeslice = limit / cpu_count;
if (timeslice == 0) {
timeslice = limit;
}
return timeslice;
}
void icount_prepare_for_run(CPUState *cpu, int64_t cpu_budget)
{
int insns_left;
@ -101,13 +114,13 @@ void icount_prepare_for_run(CPUState *cpu)
g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
g_assert(cpu->icount_extra == 0);
cpu->icount_budget = icount_get_limit();
replay_mutex_lock();
cpu->icount_budget = MIN(icount_get_limit(), cpu_budget);
insns_left = MIN(0xffff, cpu->icount_budget);
cpu_neg(cpu)->icount_decr.u16.low = insns_left;
cpu->icount_extra = cpu->icount_budget - insns_left;
replay_mutex_lock();
if (cpu->icount_budget == 0) {
/*
* We're called without the iothread lock, so must take it while

View file

@ -11,7 +11,8 @@
#define TCG_ACCEL_OPS_ICOUNT_H
void icount_handle_deadline(void);
void icount_prepare_for_run(CPUState *cpu);
void icount_prepare_for_run(CPUState *cpu, int64_t cpu_budget);
int64_t icount_percpu_budget(int cpu_count);
void icount_process_data(CPUState *cpu);
void icount_handle_interrupt(CPUState *cpu, int mask);

View file

@ -24,6 +24,7 @@
*/
#include "qemu/osdep.h"
#include "qemu/lockable.h"
#include "sysemu/tcg.h"
#include "sysemu/replay.h"
#include "sysemu/cpu-timers.h"
@ -139,6 +140,33 @@ static void rr_force_rcu(Notifier *notify, void *data)
rr_kick_next_cpu();
}
/*
* Calculate the number of CPUs that we will process in a single iteration of
* the main CPU thread loop so that we can fairly distribute the instruction
* count across CPUs.
*
* The CPU count is cached based on the CPU list generation ID to avoid
* iterating the list every time.
*/
static int rr_cpu_count(void)
{
static unsigned int last_gen_id = ~0;
static int cpu_count;
CPUState *cpu;
QEMU_LOCK_GUARD(&qemu_cpu_list_lock);
if (cpu_list_generation_id_get() != last_gen_id) {
cpu_count = 0;
CPU_FOREACH(cpu) {
++cpu_count;
}
last_gen_id = cpu_list_generation_id_get();
}
return cpu_count;
}
/*
* In the single-threaded case each vCPU is simulated in turn. If
* there is more than a single vCPU we create a simple timer to kick
@ -185,11 +213,16 @@ static void *rr_cpu_thread_fn(void *arg)
cpu->exit_request = 1;
while (1) {
/* Only used for icount_enabled() */
int64_t cpu_budget = 0;
qemu_mutex_unlock_iothread();
replay_mutex_lock();
qemu_mutex_lock_iothread();
if (icount_enabled()) {
int cpu_count = rr_cpu_count();
/* Account partial waits to QEMU_CLOCK_VIRTUAL. */
icount_account_warp_timer();
/*
@ -197,6 +230,8 @@ static void *rr_cpu_thread_fn(void *arg)
* waking up the I/O thread and waiting for completion.
*/
icount_handle_deadline();
cpu_budget = icount_percpu_budget(cpu_count);
}
replay_mutex_unlock();
@ -218,7 +253,7 @@ static void *rr_cpu_thread_fn(void *arg)
qemu_mutex_unlock_iothread();
if (icount_enabled()) {
icount_prepare_for_run(cpu);
icount_prepare_for_run(cpu, cpu_budget);
}
r = tcg_cpus_exec(cpu);
if (icount_enabled()) {

View file

@ -352,9 +352,10 @@ static abi_ulong load_elf_interp(struct elfhdr *interp_elf_ex,
static int symfind(const void *s0, const void *s1)
{
target_ulong addr = *(target_ulong *)s0;
struct elf_sym *sym = (struct elf_sym *)s1;
__typeof(sym->st_value) addr = *(uint64_t *)s0;
int result = 0;
if (addr < sym->st_value) {
result = -1;
} else if (addr >= sym->st_value + sym->st_size) {
@ -363,7 +364,7 @@ static int symfind(const void *s0, const void *s1)
return result;
}
static const char *lookup_symbolxx(struct syminfo *s, target_ulong orig_addr)
static const char *lookup_symbolxx(struct syminfo *s, uint64_t orig_addr)
{
#if ELF_CLASS == ELFCLASS32
struct elf_sym *syms = s->disas_symtab.elf32;

View file

@ -2,5 +2,4 @@ TARGET_ARCH=mips
TARGET_ABI_MIPSO32=y
TARGET_SYSTBL_ABI=o32
TARGET_SYSTBL=syscall_o32.tbl
TARGET_ALIGNED_ONLY=y
TARGET_BIG_ENDIAN=y

View file

@ -1,4 +1,3 @@
TARGET_ARCH=mips
TARGET_ALIGNED_ONLY=y
TARGET_BIG_ENDIAN=y
TARGET_SUPPORTS_MTTCG=y

View file

@ -3,5 +3,4 @@ TARGET_ABI_MIPSN64=y
TARGET_BASE_ARCH=mips
TARGET_SYSTBL_ABI=n64
TARGET_SYSTBL=syscall_n64.tbl
TARGET_ALIGNED_ONLY=y
TARGET_BIG_ENDIAN=y

View file

@ -1,4 +1,3 @@
TARGET_ARCH=mips64
TARGET_BASE_ARCH=mips
TARGET_ALIGNED_ONLY=y
TARGET_BIG_ENDIAN=y

View file

@ -3,4 +3,3 @@ TARGET_ABI_MIPSN64=y
TARGET_BASE_ARCH=mips
TARGET_SYSTBL_ABI=n64
TARGET_SYSTBL=syscall_n64.tbl
TARGET_ALIGNED_ONLY=y

View file

@ -1,4 +1,3 @@
TARGET_ARCH=mips64
TARGET_BASE_ARCH=mips
TARGET_ALIGNED_ONLY=y
TARGET_NEED_FDT=y

View file

@ -2,4 +2,3 @@ TARGET_ARCH=mips
TARGET_ABI_MIPSO32=y
TARGET_SYSTBL_ABI=o32
TARGET_SYSTBL=syscall_o32.tbl
TARGET_ALIGNED_ONLY=y

View file

@ -1,3 +1,2 @@
TARGET_ARCH=mips
TARGET_ALIGNED_ONLY=y
TARGET_SUPPORTS_MTTCG=y

View file

@ -4,5 +4,4 @@ TARGET_ABI32=y
TARGET_BASE_ARCH=mips
TARGET_SYSTBL_ABI=n32
TARGET_SYSTBL=syscall_n32.tbl
TARGET_ALIGNED_ONLY=y
TARGET_BIG_ENDIAN=y

View file

@ -4,4 +4,3 @@ TARGET_ABI32=y
TARGET_BASE_ARCH=mips
TARGET_SYSTBL_ABI=n32
TARGET_SYSTBL=syscall_n32.tbl
TARGET_ALIGNED_ONLY=y

View file

@ -1,3 +1,2 @@
TARGET_ARCH=nios2
TARGET_ALIGNED_ONLY=y
TARGET_NEED_FDT=y

View file

@ -1,5 +1,4 @@
TARGET_ARCH=sh4
TARGET_SYSTBL_ABI=common
TARGET_SYSTBL=syscall.tbl
TARGET_ALIGNED_ONLY=y
TARGET_HAS_BFLT=y

View file

@ -1,2 +1 @@
TARGET_ARCH=sh4
TARGET_ALIGNED_ONLY=y

View file

@ -1,6 +1,5 @@
TARGET_ARCH=sh4
TARGET_SYSTBL_ABI=common
TARGET_SYSTBL=syscall.tbl
TARGET_ALIGNED_ONLY=y
TARGET_BIG_ENDIAN=y
TARGET_HAS_BFLT=y

View file

@ -1,3 +1,2 @@
TARGET_ARCH=sh4
TARGET_ALIGNED_ONLY=y
TARGET_BIG_ENDIAN=y

View file

@ -25,7 +25,7 @@
#include "qemu/lockable.h"
#include "trace/trace-root.h"
static QemuMutex qemu_cpu_list_lock;
QemuMutex qemu_cpu_list_lock;
static QemuCond exclusive_cond;
static QemuCond exclusive_resume;
static QemuCond qemu_work_cond;

21
disas/disas-internal.h Normal file
View file

@ -0,0 +1,21 @@
/*
* Definitions used internally in the disassembly code
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#ifndef DISAS_INTERNAL_H
#define DISAS_INTERNAL_H
#include "disas/dis-asm.h"
typedef struct CPUDebug {
struct disassemble_info info;
CPUState *cpu;
} CPUDebug;
void disas_initialize_debug_target(CPUDebug *s, CPUState *cpu);
int disas_gstring_printf(FILE *stream, const char *fmt, ...)
G_GNUC_PRINTF(2, 3);
#endif

65
disas/disas-mon.c Normal file
View file

@ -0,0 +1,65 @@
/*
* Functions related to disassembly from the monitor
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include "qemu/osdep.h"
#include "disas-internal.h"
#include "disas/disas.h"
#include "exec/memory.h"
#include "hw/core/cpu.h"
#include "monitor/monitor.h"
static int
physical_read_memory(bfd_vma memaddr, bfd_byte *myaddr, int length,
struct disassemble_info *info)
{
CPUDebug *s = container_of(info, CPUDebug, info);
MemTxResult res;
res = address_space_read(s->cpu->as, memaddr, MEMTXATTRS_UNSPECIFIED,
myaddr, length);
return res == MEMTX_OK ? 0 : EIO;
}
/* Disassembler for the monitor. */
void monitor_disas(Monitor *mon, CPUState *cpu, uint64_t pc,
int nb_insn, bool is_physical)
{
int count, i;
CPUDebug s;
g_autoptr(GString) ds = g_string_new("");
disas_initialize_debug_target(&s, cpu);
s.info.fprintf_func = disas_gstring_printf;
s.info.stream = (FILE *)ds; /* abuse this slot */
if (is_physical) {
s.info.read_memory_func = physical_read_memory;
}
s.info.buffer_vma = pc;
if (s.info.cap_arch >= 0 && cap_disas_monitor(&s.info, pc, nb_insn)) {
monitor_puts(mon, ds->str);
return;
}
if (!s.info.print_insn) {
monitor_printf(mon, "0x%08" PRIx64
": Asm output not supported on this arch\n", pc);
return;
}
for (i = 0; i < nb_insn; i++) {
g_string_append_printf(ds, "0x%08" PRIx64 ": ", pc);
count = s.info.print_insn(pc, &s.info);
g_string_append_c(ds, '\n');
if (count < 0) {
break;
}
pc += count;
}
monitor_puts(mon, ds->str);
}

View file

@ -1,16 +1,12 @@
/* General "disassemble this chunk" code. Used for debugging. */
#include "qemu/osdep.h"
#include "disas/dis-asm.h"
#include "disas/disas-internal.h"
#include "elf.h"
#include "qemu/qemu-print.h"
#include "disas/disas.h"
#include "disas/capstone.h"
typedef struct CPUDebug {
struct disassemble_info info;
CPUState *cpu;
} CPUDebug;
#include "hw/core/cpu.h"
#include "exec/memory.h"
/* Filled in by elfload.c. Simplistic, but will do for now. */
struct syminfo *syminfos = NULL;
@ -119,18 +115,18 @@ static void initialize_debug(CPUDebug *s)
s->info.symbol_at_address_func = symbol_at_address;
}
static void initialize_debug_target(CPUDebug *s, CPUState *cpu)
void disas_initialize_debug_target(CPUDebug *s, CPUState *cpu)
{
initialize_debug(s);
s->cpu = cpu;
s->info.read_memory_func = target_read_memory;
s->info.print_address_func = print_address;
#if TARGET_BIG_ENDIAN
s->info.endian = BFD_ENDIAN_BIG;
#else
s->info.endian = BFD_ENDIAN_LITTLE;
#endif
if (target_words_bigendian()) {
s->info.endian = BFD_ENDIAN_BIG;
} else {
s->info.endian = BFD_ENDIAN_LITTLE;
}
CPUClass *cc = CPU_GET_CLASS(cpu);
if (cc->disas_set_info) {
@ -168,7 +164,7 @@ static void initialize_debug_host(CPUDebug *s)
# ifdef _ARCH_PPC64
s->info.cap_mode = CS_MODE_64;
# endif
#elif defined(__riscv) && defined(CONFIG_RISCV_DIS)
#elif defined(__riscv)
#if defined(_ILP32) || (__riscv_xlen == 32)
s->info.print_insn = print_insn_riscv32;
#elif defined(_LP64)
@ -204,14 +200,13 @@ static void initialize_debug_host(CPUDebug *s)
}
/* Disassemble this for me please... (debugging). */
void target_disas(FILE *out, CPUState *cpu, target_ulong code,
target_ulong size)
void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size)
{
target_ulong pc;
uint64_t pc;
int count;
CPUDebug s;
initialize_debug_target(&s, cpu);
disas_initialize_debug_target(&s, cpu);
s.info.fprintf_func = fprintf;
s.info.stream = out;
s.info.buffer_vma = code;
@ -226,11 +221,12 @@ void target_disas(FILE *out, CPUState *cpu, target_ulong code,
}
for (pc = code; size > 0; pc += count, size -= count) {
fprintf(out, "0x" TARGET_FMT_lx ": ", pc);
count = s.info.print_insn(pc, &s.info);
fprintf(out, "\n");
if (count < 0)
break;
fprintf(out, "0x%08" PRIx64 ": ", pc);
count = s.info.print_insn(pc, &s.info);
fprintf(out, "\n");
if (count < 0) {
break;
}
if (size < count) {
fprintf(out,
"Disassembler disagrees with translator over instruction "
@ -241,8 +237,7 @@ void target_disas(FILE *out, CPUState *cpu, target_ulong code,
}
}
static int G_GNUC_PRINTF(2, 3)
gstring_printf(FILE *stream, const char *fmt, ...)
int disas_gstring_printf(FILE *stream, const char *fmt, ...)
{
/* We abuse the FILE parameter to pass a GString. */
GString *s = (GString *)stream;
@ -272,8 +267,8 @@ char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size)
CPUDebug s;
GString *ds = g_string_new(NULL);
initialize_debug_target(&s, cpu);
s.info.fprintf_func = gstring_printf;
disas_initialize_debug_target(&s, cpu);
s.info.fprintf_func = disas_gstring_printf;
s.info.stream = (FILE *)ds; /* abuse this slot */
s.info.buffer_vma = addr;
s.info.buffer_length = size;
@ -292,7 +287,7 @@ char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size)
}
/* Disassemble this for me please... (debugging). */
void disas(FILE *out, const void *code, unsigned long size)
void disas(FILE *out, const void *code, size_t size)
{
uintptr_t pc;
int count;
@ -324,7 +319,7 @@ void disas(FILE *out, const void *code, unsigned long size)
}
/* Look up symbol for debugging purpose. Returns "" if unknown. */
const char *lookup_symbol(target_ulong orig_addr)
const char *lookup_symbol(uint64_t orig_addr)
{
const char *symbol = "";
struct syminfo *s;
@ -338,61 +333,3 @@ const char *lookup_symbol(target_ulong orig_addr)
return symbol;
}
#if !defined(CONFIG_USER_ONLY)
#include "monitor/monitor.h"
static int
physical_read_memory(bfd_vma memaddr, bfd_byte *myaddr, int length,
struct disassemble_info *info)
{
CPUDebug *s = container_of(info, CPUDebug, info);
MemTxResult res;
res = address_space_read(s->cpu->as, memaddr, MEMTXATTRS_UNSPECIFIED,
myaddr, length);
return res == MEMTX_OK ? 0 : EIO;
}
/* Disassembler for the monitor. */
void monitor_disas(Monitor *mon, CPUState *cpu,
target_ulong pc, int nb_insn, int is_physical)
{
int count, i;
CPUDebug s;
g_autoptr(GString) ds = g_string_new("");
initialize_debug_target(&s, cpu);
s.info.fprintf_func = gstring_printf;
s.info.stream = (FILE *)ds; /* abuse this slot */
if (is_physical) {
s.info.read_memory_func = physical_read_memory;
}
s.info.buffer_vma = pc;
if (s.info.cap_arch >= 0 && cap_disas_monitor(&s.info, pc, nb_insn)) {
monitor_puts(mon, ds->str);
return;
}
if (!s.info.print_insn) {
monitor_printf(mon, "0x" TARGET_FMT_lx
": Asm output not supported on this arch\n", pc);
return;
}
for (i = 0; i < nb_insn; i++) {
g_string_append_printf(ds, "0x" TARGET_FMT_lx ": ", pc);
count = s.info.print_insn(pc, &s.info);
g_string_append_c(ds, '\n');
if (count < 0) {
break;
}
pc += count;
}
monitor_puts(mon, ds->str);
}
#endif

View file

@ -10,4 +10,8 @@ common_ss.add(when: 'CONFIG_RISCV_DIS', if_true: files('riscv.c'))
common_ss.add(when: 'CONFIG_SH4_DIS', if_true: files('sh4.c'))
common_ss.add(when: 'CONFIG_SPARC_DIS', if_true: files('sparc.c'))
common_ss.add(when: 'CONFIG_XTENSA_DIS', if_true: files('xtensa.c'))
common_ss.add(when: capstone, if_true: files('capstone.c'))
common_ss.add(when: capstone, if_true: [files('capstone.c'), capstone])
common_ss.add(files('disas.c'))
softmmu_ss.add(files('disas-mon.c'))
specific_ss.add(capstone)

View file

@ -1,34 +1,23 @@
#ifndef QEMU_DISAS_H
#define QEMU_DISAS_H
#include "exec/hwaddr.h"
#ifdef NEED_CPU_H
#include "cpu.h"
/* Disassemble this for me please... (debugging). */
void disas(FILE *out, const void *code, unsigned long size);
void target_disas(FILE *out, CPUState *cpu, target_ulong code,
target_ulong size);
void disas(FILE *out, const void *code, size_t size);
void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size);
void monitor_disas(Monitor *mon, CPUState *cpu,
target_ulong pc, int nb_insn, int is_physical);
void monitor_disas(Monitor *mon, CPUState *cpu, uint64_t pc,
int nb_insn, bool is_physical);
char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size);
/* Look up symbol for debugging purpose. Returns "" if unknown. */
const char *lookup_symbol(target_ulong orig_addr);
#endif
const char *lookup_symbol(uint64_t orig_addr);
struct syminfo;
struct elf32_sym;
struct elf64_sym;
#if defined(CONFIG_USER_ONLY)
typedef const char *(*lookup_symbol_t)(struct syminfo *s, target_ulong orig_addr);
#else
typedef const char *(*lookup_symbol_t)(struct syminfo *s, hwaddr orig_addr);
#endif
typedef const char *(*lookup_symbol_t)(struct syminfo *s, uint64_t orig_addr);
struct syminfo {
lookup_symbol_t lookup_symbol;

View file

@ -32,6 +32,7 @@ extern intptr_t qemu_host_page_mask;
#define REAL_HOST_PAGE_ALIGN(addr) ROUND_UP((addr), qemu_real_host_page_size())
/* The CPU list lock nests outside page_(un)lock or mmap_(un)lock */
extern QemuMutex qemu_cpu_list_lock;
void qemu_init_cpu_list(void);
void cpu_list_lock(void);
void cpu_list_unlock(void);

View file

@ -111,8 +111,11 @@ typedef struct CPUTLBEntry {
use the corresponding iotlb value. */
uintptr_t addend;
};
/* padding to get a power of two size */
uint8_t dummy[1 << CPU_TLB_ENTRY_BITS];
/*
* Padding to get a power of two size, as well as index
* access to addr_{read,write,code}.
*/
target_ulong addr_idx[(1 << CPU_TLB_ENTRY_BITS) / TARGET_LONG_SIZE];
};
} CPUTLBEntry;

View file

@ -360,13 +360,29 @@ static inline void clear_helper_retaddr(void)
/* Needed for TCG_OVERSIZED_GUEST */
#include "tcg/tcg.h"
static inline target_ulong tlb_read_idx(const CPUTLBEntry *entry,
MMUAccessType access_type)
{
/* Do not rearrange the CPUTLBEntry structure members. */
QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_read) !=
MMU_DATA_LOAD * TARGET_LONG_SIZE);
QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_write) !=
MMU_DATA_STORE * TARGET_LONG_SIZE);
QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_code) !=
MMU_INST_FETCH * TARGET_LONG_SIZE);
const target_ulong *ptr = &entry->addr_idx[access_type];
#if TCG_OVERSIZED_GUEST
return *ptr;
#else
/* ofs might correspond to .addr_write, so use qatomic_read */
return qatomic_read(ptr);
#endif
}
static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry)
{
#if TCG_OVERSIZED_GUEST
return entry->addr_write;
#else
return qatomic_read(&entry->addr_write);
#endif
return tlb_read_idx(entry, MMU_DATA_STORE);
}
/* Find the TLB index corresponding to the mmu_idx + address pair. */

View file

@ -47,8 +47,6 @@ typedef enum MemOp {
* MO_UNALN accesses are never checked for alignment.
* MO_ALIGN accesses will result in a call to the CPU's
* do_unaligned_access hook if the guest address is not aligned.
* The default depends on whether the target CPU defines
* TARGET_ALIGNED_ONLY.
*
* Some architectures (e.g. ARMv8) need the address which is aligned
* to a size more than the size of the memory access.
@ -65,21 +63,14 @@ typedef enum MemOp {
*/
MO_ASHIFT = 5,
MO_AMASK = 0x7 << MO_ASHIFT,
#ifdef NEED_CPU_H
#ifdef TARGET_ALIGNED_ONLY
MO_ALIGN = 0,
MO_UNALN = MO_AMASK,
#else
MO_ALIGN = MO_AMASK,
MO_UNALN = 0,
#endif
#endif
MO_UNALN = 0,
MO_ALIGN_2 = 1 << MO_ASHIFT,
MO_ALIGN_4 = 2 << MO_ASHIFT,
MO_ALIGN_8 = 3 << MO_ASHIFT,
MO_ALIGN_16 = 4 << MO_ASHIFT,
MO_ALIGN_32 = 5 << MO_ASHIFT,
MO_ALIGN_64 = 6 << MO_ASHIFT,
MO_ALIGN = MO_AMASK,
/* Combinations of the above, for ease of use. */
MO_UB = MO_8,

View file

@ -35,7 +35,6 @@
#pragma GCC poison TARGET_TRICORE
#pragma GCC poison TARGET_XTENSA
#pragma GCC poison TARGET_ALIGNED_ONLY
#pragma GCC poison TARGET_HAS_BFLT
#pragma GCC poison TARGET_NAME
#pragma GCC poison TARGET_SUPPORTS_MTTCG

View file

@ -17,6 +17,7 @@
#include "qemu/guest-random.h"
#include "qemu/units.h"
#include "qemu/selfmap.h"
#include "qemu/lockable.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "target_signal.h"
@ -3327,9 +3328,10 @@ static void load_elf_interp(const char *filename, struct image_info *info,
static int symfind(const void *s0, const void *s1)
{
target_ulong addr = *(target_ulong *)s0;
struct elf_sym *sym = (struct elf_sym *)s1;
__typeof(sym->st_value) addr = *(uint64_t *)s0;
int result = 0;
if (addr < sym->st_value) {
result = -1;
} else if (addr >= sym->st_value + sym->st_size) {
@ -3338,7 +3340,7 @@ static int symfind(const void *s0, const void *s1)
return result;
}
static const char *lookup_symbolxx(struct syminfo *s, target_ulong orig_addr)
static const char *lookup_symbolxx(struct syminfo *s, uint64_t orig_addr)
{
#if ELF_CLASS == ELFCLASS32
struct elf_sym *syms = s->disas_symtab.elf32;
@ -4237,14 +4239,14 @@ static int fill_note_info(struct elf_note_info *info,
info->notes_size += note_size(&info->notes[i]);
/* read and fill status of all threads */
cpu_list_lock();
CPU_FOREACH(cpu) {
if (cpu == thread_cpu) {
continue;
WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
CPU_FOREACH(cpu) {
if (cpu == thread_cpu) {
continue;
}
fill_thread_info(info, cpu->env_ptr);
}
fill_thread_info(info, cpu->env_ptr);
}
cpu_list_unlock();
return (0);
}

View file

@ -3153,9 +3153,6 @@ specific_ss.add(files('cpu.c'))
subdir('softmmu')
common_ss.add(capstone)
specific_ss.add(files('disas.c'), capstone)
# Work around a gcc bug/misfeature wherein constant propagation looks
# through an alias:
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99696

View file

@ -150,25 +150,25 @@ int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms,
retry:
init_time_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
cpu_list_lock();
gen_id = cpu_list_generation_id_get();
records = vcpu_dirty_stat_alloc(stat);
vcpu_dirty_stat_collect(stat, records, true);
cpu_list_unlock();
WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
gen_id = cpu_list_generation_id_get();
records = vcpu_dirty_stat_alloc(stat);
vcpu_dirty_stat_collect(stat, records, true);
}
duration = dirty_stat_wait(calc_time_ms, init_time_ms);
global_dirty_log_sync(flag, one_shot);
cpu_list_lock();
if (gen_id != cpu_list_generation_id_get()) {
g_free(records);
g_free(stat->rates);
cpu_list_unlock();
goto retry;
WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
if (gen_id != cpu_list_generation_id_get()) {
g_free(records);
g_free(stat->rates);
cpu_list_unlock();
goto retry;
}
vcpu_dirty_stat_collect(stat, records, false);
}
vcpu_dirty_stat_collect(stat, records, false);
cpu_list_unlock();
for (i = 0; i < stat->nvcpu; i++) {
dirtyrate = do_calculate_dirtyrate(records[i], duration);

View file

@ -74,7 +74,7 @@ uint64_t replay_get_current_icount(void)
int replay_get_instructions(void)
{
int res = 0;
replay_mutex_lock();
g_assert(replay_mutex_locked());
if (replay_next_event_is(EVENT_INSTRUCTION)) {
res = replay_state.instruction_count;
if (replay_break_icount != -1LL) {
@ -85,7 +85,6 @@ int replay_get_instructions(void)
}
}
}
replay_mutex_unlock();
return res;
}

View file

@ -15,7 +15,6 @@
#include "exec/cpu_ldst.h"
#include "hw/irq.h"
#include "cpu-csr.h"
#include "tcg/tcg-ldst.h"
target_ulong helper_csrrd_pgd(CPULoongArchState *env)
{

View file

@ -12,7 +12,6 @@
#include "exec/helper-proto.h"
#include "exec/exec-all.h"
#include "exec/cpu_ldst.h"
#include "tcg/tcg-ldst.h"
#define GET_MEMTXATTRS(cas) \
((MemTxAttrs){.requester_id = env_cpu(cas)->cpu_index})

View file

@ -959,6 +959,7 @@ static void gen_load_fp(DisasContext *s, int opsize, TCGv addr, TCGv_ptr fp,
switch (opsize) {
case OS_BYTE:
case OS_WORD:
case OS_LONG:
tcg_gen_qemu_ld_tl(tmp, addr, index, opsize | MO_SIGN | MO_TE);
gen_helper_exts32(cpu_env, fp, tmp);
break;

View file

@ -977,20 +977,24 @@ static void gen_ldst_pair(DisasContext *ctx, uint32_t opc, int rd,
gen_reserved_instruction(ctx);
return;
}
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL);
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL |
ctx->default_tcg_memop_mask);
gen_store_gpr(t1, rd);
tcg_gen_movi_tl(t1, 4);
gen_op_addr_add(ctx, t0, t0, t1);
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL);
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL |
ctx->default_tcg_memop_mask);
gen_store_gpr(t1, rd + 1);
break;
case SWP:
gen_load_gpr(t1, rd);
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
ctx->default_tcg_memop_mask);
tcg_gen_movi_tl(t1, 4);
gen_op_addr_add(ctx, t0, t0, t1);
gen_load_gpr(t1, rd + 1);
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
ctx->default_tcg_memop_mask);
break;
#ifdef TARGET_MIPS64
case LDP:
@ -998,20 +1002,24 @@ static void gen_ldst_pair(DisasContext *ctx, uint32_t opc, int rd,
gen_reserved_instruction(ctx);
return;
}
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEUQ);
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEUQ |
ctx->default_tcg_memop_mask);
gen_store_gpr(t1, rd);
tcg_gen_movi_tl(t1, 8);
gen_op_addr_add(ctx, t0, t0, t1);
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEUQ);
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEUQ |
ctx->default_tcg_memop_mask);
gen_store_gpr(t1, rd + 1);
break;
case SDP:
gen_load_gpr(t1, rd);
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUQ);
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUQ |
ctx->default_tcg_memop_mask);
tcg_gen_movi_tl(t1, 8);
gen_op_addr_add(ctx, t0, t0, t1);
gen_load_gpr(t1, rd + 1);
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUQ);
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUQ |
ctx->default_tcg_memop_mask);
break;
#endif
}

View file

@ -172,22 +172,26 @@ static void gen_mips16_save(DisasContext *ctx,
case 4:
gen_base_offset_addr(ctx, t0, 29, 12);
gen_load_gpr(t1, 7);
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
ctx->default_tcg_memop_mask);
/* Fall through */
case 3:
gen_base_offset_addr(ctx, t0, 29, 8);
gen_load_gpr(t1, 6);
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
ctx->default_tcg_memop_mask);
/* Fall through */
case 2:
gen_base_offset_addr(ctx, t0, 29, 4);
gen_load_gpr(t1, 5);
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
ctx->default_tcg_memop_mask);
/* Fall through */
case 1:
gen_base_offset_addr(ctx, t0, 29, 0);
gen_load_gpr(t1, 4);
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
ctx->default_tcg_memop_mask);
}
gen_load_gpr(t0, 29);
@ -196,7 +200,8 @@ static void gen_mips16_save(DisasContext *ctx,
tcg_gen_movi_tl(t2, -4); \
gen_op_addr_add(ctx, t0, t0, t2); \
gen_load_gpr(t1, reg); \
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL); \
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL | \
ctx->default_tcg_memop_mask); \
} while (0)
if (do_ra) {
@ -298,7 +303,8 @@ static void gen_mips16_restore(DisasContext *ctx,
#define DECR_AND_LOAD(reg) do { \
tcg_gen_movi_tl(t2, -4); \
gen_op_addr_add(ctx, t0, t0, t2); \
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL); \
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL | \
ctx->default_tcg_memop_mask); \
gen_store_gpr(t1, reg); \
} while (0)

View file

@ -831,7 +831,8 @@ static void gen_mxu_s32ldd_s32lddr(DisasContext *ctx)
tcg_gen_ori_tl(t1, t1, 0xFFFFF000);
}
tcg_gen_add_tl(t1, t0, t1);
tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, MO_TESL ^ (sel * MO_BSWAP));
tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, (MO_TESL ^ (sel * MO_BSWAP)) |
ctx->default_tcg_memop_mask);
gen_store_mxu_gpr(t1, XRa);
}

View file

@ -998,7 +998,7 @@ static void gen_llwp(DisasContext *ctx, uint32_t base, int16_t offset,
TCGv tmp2 = tcg_temp_new();
gen_base_offset_addr(ctx, taddr, base, offset);
tcg_gen_qemu_ld_i64(tval, taddr, ctx->mem_idx, MO_TEUQ);
tcg_gen_qemu_ld_i64(tval, taddr, ctx->mem_idx, MO_TEUQ | MO_ALIGN);
if (cpu_is_bigendian(ctx)) {
tcg_gen_extr_i64_tl(tmp2, tmp1, tval);
} else {
@ -1039,7 +1039,8 @@ static void gen_scwp(DisasContext *ctx, uint32_t base, int16_t offset,
tcg_gen_ld_i64(llval, cpu_env, offsetof(CPUMIPSState, llval_wp));
tcg_gen_atomic_cmpxchg_i64(val, taddr, llval, tval,
eva ? MIPS_HFLAG_UM : ctx->mem_idx, MO_64);
eva ? MIPS_HFLAG_UM : ctx->mem_idx,
MO_64 | MO_ALIGN);
if (reg1 != 0) {
tcg_gen_movi_tl(cpu_gpr[reg1], 1);
}
@ -2640,52 +2641,49 @@ static void gen_p_lsx(DisasContext *ctx, int rd, int rs, int rt)
switch (extract32(ctx->opcode, 7, 4)) {
case NM_LBX:
tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
MO_SB);
tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SB);
gen_store_gpr(t0, rd);
break;
case NM_LHX:
/*case NM_LHXS:*/
tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
MO_TESW);
MO_TESW | ctx->default_tcg_memop_mask);
gen_store_gpr(t0, rd);
break;
case NM_LWX:
/*case NM_LWXS:*/
tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
MO_TESL);
MO_TESL | ctx->default_tcg_memop_mask);
gen_store_gpr(t0, rd);
break;
case NM_LBUX:
tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
MO_UB);
tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_UB);
gen_store_gpr(t0, rd);
break;
case NM_LHUX:
/*case NM_LHUXS:*/
tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
MO_TEUW);
MO_TEUW | ctx->default_tcg_memop_mask);
gen_store_gpr(t0, rd);
break;
case NM_SBX:
check_nms(ctx);
gen_load_gpr(t1, rd);
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
MO_8);
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_8);
break;
case NM_SHX:
/*case NM_SHXS:*/
check_nms(ctx);
gen_load_gpr(t1, rd);
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
MO_TEUW);
MO_TEUW | ctx->default_tcg_memop_mask);
break;
case NM_SWX:
/*case NM_SWXS:*/
check_nms(ctx);
gen_load_gpr(t1, rd);
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
MO_TEUL);
MO_TEUL | ctx->default_tcg_memop_mask);
break;
case NM_LWC1X:
/*case NM_LWC1XS:*/
@ -3738,7 +3736,8 @@ static int decode_nanomips_32_48_opc(CPUMIPSState *env, DisasContext *ctx)
addr_off);
tcg_gen_movi_tl(t0, addr);
tcg_gen_qemu_ld_tl(cpu_gpr[rt], t0, ctx->mem_idx, MO_TESL);
tcg_gen_qemu_ld_tl(cpu_gpr[rt], t0, ctx->mem_idx,
MO_TESL | ctx->default_tcg_memop_mask);
}
break;
case NM_SWPC48:
@ -3754,7 +3753,8 @@ static int decode_nanomips_32_48_opc(CPUMIPSState *env, DisasContext *ctx)
tcg_gen_movi_tl(t0, addr);
gen_load_gpr(t1, rt);
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
MO_TEUL | ctx->default_tcg_memop_mask);
}
break;
default:
@ -4305,7 +4305,7 @@ static int decode_nanomips_32_48_opc(CPUMIPSState *env, DisasContext *ctx)
TCGv va = tcg_temp_new();
TCGv t1 = tcg_temp_new();
MemOp memop = (extract32(ctx->opcode, 8, 3)) ==
NM_P_LS_UAWM ? MO_UNALN : 0;
NM_P_LS_UAWM ? MO_UNALN : MO_ALIGN;
count = (count == 0) ? 8 : count;
while (counter != count) {

View file

@ -298,6 +298,11 @@ static void gen_ldx(DisasContext *dc, uint32_t code, uint32_t flags)
TCGv data = dest_gpr(dc, instr.b);
tcg_gen_addi_tl(addr, load_gpr(dc, instr.a), instr.imm16.s);
#ifdef CONFIG_USER_ONLY
flags |= MO_UNALN;
#else
flags |= MO_ALIGN;
#endif
tcg_gen_qemu_ld_tl(data, addr, dc->mem_idx, flags);
}
@ -309,6 +314,11 @@ static void gen_stx(DisasContext *dc, uint32_t code, uint32_t flags)
TCGv addr = tcg_temp_new();
tcg_gen_addi_tl(addr, load_gpr(dc, instr.a), instr.imm16.s);
#ifdef CONFIG_USER_ONLY
flags |= MO_UNALN;
#else
flags |= MO_ALIGN;
#endif
tcg_gen_qemu_st_tl(val, addr, dc->mem_idx, flags);
}

View file

@ -527,13 +527,15 @@ static void _decode_opc(DisasContext * ctx)
case 0x9000: /* mov.w @(disp,PC),Rn */
{
TCGv addr = tcg_constant_i32(ctx->base.pc_next + 4 + B7_0 * 2);
tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_TESW);
tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx,
MO_TESW | MO_ALIGN);
}
return;
case 0xd000: /* mov.l @(disp,PC),Rn */
{
TCGv addr = tcg_constant_i32((ctx->base.pc_next + 4 + B7_0 * 4) & ~3);
tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_TESL);
tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx,
MO_TESL | MO_ALIGN);
}
return;
case 0x7000: /* add #imm,Rn */
@ -801,9 +803,11 @@ static void _decode_opc(DisasContext * ctx)
{
TCGv arg0, arg1;
arg0 = tcg_temp_new();
tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx, MO_TESL);
tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx,
MO_TESL | MO_ALIGN);
arg1 = tcg_temp_new();
tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx, MO_TESL);
tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx,
MO_TESL | MO_ALIGN);
gen_helper_macl(cpu_env, arg0, arg1);
tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4);
tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
@ -813,9 +817,11 @@ static void _decode_opc(DisasContext * ctx)
{
TCGv arg0, arg1;
arg0 = tcg_temp_new();
tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx, MO_TESL);
tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx,
MO_TESL | MO_ALIGN);
arg1 = tcg_temp_new();
tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx, MO_TESL);
tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx,
MO_TESL | MO_ALIGN);
gen_helper_macw(cpu_env, arg0, arg1);
tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 2);
tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 2);
@ -961,30 +967,36 @@ static void _decode_opc(DisasContext * ctx)
if (ctx->tbflags & FPSCR_SZ) {
TCGv_i64 fp = tcg_temp_new_i64();
gen_load_fpr64(ctx, fp, XHACK(B7_4));
tcg_gen_qemu_st_i64(fp, REG(B11_8), ctx->memidx, MO_TEUQ);
tcg_gen_qemu_st_i64(fp, REG(B11_8), ctx->memidx,
MO_TEUQ | MO_ALIGN);
} else {
tcg_gen_qemu_st_i32(FREG(B7_4), REG(B11_8), ctx->memidx, MO_TEUL);
tcg_gen_qemu_st_i32(FREG(B7_4), REG(B11_8), ctx->memidx,
MO_TEUL | MO_ALIGN);
}
return;
case 0xf008: /* fmov @Rm,{F,D,X}Rn - FPSCR: Nothing */
CHECK_FPU_ENABLED
if (ctx->tbflags & FPSCR_SZ) {
TCGv_i64 fp = tcg_temp_new_i64();
tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx, MO_TEUQ);
tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx,
MO_TEUQ | MO_ALIGN);
gen_store_fpr64(ctx, fp, XHACK(B11_8));
} else {
tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, MO_TEUL);
tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx,
MO_TEUL | MO_ALIGN);
}
return;
case 0xf009: /* fmov @Rm+,{F,D,X}Rn - FPSCR: Nothing */
CHECK_FPU_ENABLED
if (ctx->tbflags & FPSCR_SZ) {
TCGv_i64 fp = tcg_temp_new_i64();
tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx, MO_TEUQ);
tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx,
MO_TEUQ | MO_ALIGN);
gen_store_fpr64(ctx, fp, XHACK(B11_8));
tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 8);
} else {
tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, MO_TEUL);
tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx,
MO_TEUL | MO_ALIGN);
tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4);
}
return;
@ -996,10 +1008,12 @@ static void _decode_opc(DisasContext * ctx)
TCGv_i64 fp = tcg_temp_new_i64();
gen_load_fpr64(ctx, fp, XHACK(B7_4));
tcg_gen_subi_i32(addr, REG(B11_8), 8);
tcg_gen_qemu_st_i64(fp, addr, ctx->memidx, MO_TEUQ);
tcg_gen_qemu_st_i64(fp, addr, ctx->memidx,
MO_TEUQ | MO_ALIGN);
} else {
tcg_gen_subi_i32(addr, REG(B11_8), 4);
tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL);
tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx,
MO_TEUL | MO_ALIGN);
}
tcg_gen_mov_i32(REG(B11_8), addr);
}
@ -1011,10 +1025,12 @@ static void _decode_opc(DisasContext * ctx)
tcg_gen_add_i32(addr, REG(B7_4), REG(0));
if (ctx->tbflags & FPSCR_SZ) {
TCGv_i64 fp = tcg_temp_new_i64();
tcg_gen_qemu_ld_i64(fp, addr, ctx->memidx, MO_TEUQ);
tcg_gen_qemu_ld_i64(fp, addr, ctx->memidx,
MO_TEUQ | MO_ALIGN);
gen_store_fpr64(ctx, fp, XHACK(B11_8));
} else {
tcg_gen_qemu_ld_i32(FREG(B11_8), addr, ctx->memidx, MO_TEUL);
tcg_gen_qemu_ld_i32(FREG(B11_8), addr, ctx->memidx,
MO_TEUL | MO_ALIGN);
}
}
return;
@ -1026,9 +1042,11 @@ static void _decode_opc(DisasContext * ctx)
if (ctx->tbflags & FPSCR_SZ) {
TCGv_i64 fp = tcg_temp_new_i64();
gen_load_fpr64(ctx, fp, XHACK(B7_4));
tcg_gen_qemu_st_i64(fp, addr, ctx->memidx, MO_TEUQ);
tcg_gen_qemu_st_i64(fp, addr, ctx->memidx,
MO_TEUQ | MO_ALIGN);
} else {
tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL);
tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx,
MO_TEUL | MO_ALIGN);
}
}
return;
@ -1158,14 +1176,14 @@ static void _decode_opc(DisasContext * ctx)
{
TCGv addr = tcg_temp_new();
tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 2);
tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESW);
tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESW | MO_ALIGN);
}
return;
case 0xc600: /* mov.l @(disp,GBR),R0 */
{
TCGv addr = tcg_temp_new();
tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 4);
tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESL);
tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESL | MO_ALIGN);
}
return;
case 0xc000: /* mov.b R0,@(disp,GBR) */
@ -1179,14 +1197,14 @@ static void _decode_opc(DisasContext * ctx)
{
TCGv addr = tcg_temp_new();
tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 2);
tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUW);
tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUW | MO_ALIGN);
}
return;
case 0xc200: /* mov.l R0,@(disp,GBR) */
{
TCGv addr = tcg_temp_new();
tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 4);
tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUL);
tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUL | MO_ALIGN);
}
return;
case 0x8000: /* mov.b R0,@(disp,Rn) */
@ -1286,7 +1304,8 @@ static void _decode_opc(DisasContext * ctx)
return;
case 0x4087: /* ldc.l @Rm+,Rn_BANK */
CHECK_PRIVILEGED
tcg_gen_qemu_ld_i32(ALTREG(B6_4), REG(B11_8), ctx->memidx, MO_TESL);
tcg_gen_qemu_ld_i32(ALTREG(B6_4), REG(B11_8), ctx->memidx,
MO_TESL | MO_ALIGN);
tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
return;
case 0x0082: /* stc Rm_BANK,Rn */
@ -1298,7 +1317,8 @@ static void _decode_opc(DisasContext * ctx)
{
TCGv addr = tcg_temp_new();
tcg_gen_subi_i32(addr, REG(B11_8), 4);
tcg_gen_qemu_st_i32(ALTREG(B6_4), addr, ctx->memidx, MO_TEUL);
tcg_gen_qemu_st_i32(ALTREG(B6_4), addr, ctx->memidx,
MO_TEUL | MO_ALIGN);
tcg_gen_mov_i32(REG(B11_8), addr);
}
return;
@ -1354,7 +1374,8 @@ static void _decode_opc(DisasContext * ctx)
CHECK_PRIVILEGED
{
TCGv val = tcg_temp_new();
tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx, MO_TESL);
tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx,
MO_TESL | MO_ALIGN);
tcg_gen_andi_i32(val, val, 0x700083f3);
gen_write_sr(val);
tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
@ -1372,7 +1393,7 @@ static void _decode_opc(DisasContext * ctx)
TCGv val = tcg_temp_new();
tcg_gen_subi_i32(addr, REG(B11_8), 4);
gen_read_sr(val);
tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL);
tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL | MO_ALIGN);
tcg_gen_mov_i32(REG(B11_8), addr);
}
return;
@ -1383,7 +1404,8 @@ static void _decode_opc(DisasContext * ctx)
return; \
case ldpnum: \
prechk \
tcg_gen_qemu_ld_i32(cpu_##reg, REG(B11_8), ctx->memidx, MO_TESL); \
tcg_gen_qemu_ld_i32(cpu_##reg, REG(B11_8), ctx->memidx, \
MO_TESL | MO_ALIGN); \
tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4); \
return;
#define ST(reg,stnum,stpnum,prechk) \
@ -1396,7 +1418,8 @@ static void _decode_opc(DisasContext * ctx)
{ \
TCGv addr = tcg_temp_new(); \
tcg_gen_subi_i32(addr, REG(B11_8), 4); \
tcg_gen_qemu_st_i32(cpu_##reg, addr, ctx->memidx, MO_TEUL); \
tcg_gen_qemu_st_i32(cpu_##reg, addr, ctx->memidx, \
MO_TEUL | MO_ALIGN); \
tcg_gen_mov_i32(REG(B11_8), addr); \
} \
return;
@ -1423,7 +1446,8 @@ static void _decode_opc(DisasContext * ctx)
CHECK_FPU_ENABLED
{
TCGv addr = tcg_temp_new();
tcg_gen_qemu_ld_i32(addr, REG(B11_8), ctx->memidx, MO_TESL);
tcg_gen_qemu_ld_i32(addr, REG(B11_8), ctx->memidx,
MO_TESL | MO_ALIGN);
tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
gen_helper_ld_fpscr(cpu_env, addr);
ctx->base.is_jmp = DISAS_STOP;
@ -1441,16 +1465,18 @@ static void _decode_opc(DisasContext * ctx)
tcg_gen_andi_i32(val, cpu_fpscr, 0x003fffff);
addr = tcg_temp_new();
tcg_gen_subi_i32(addr, REG(B11_8), 4);
tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL);
tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL | MO_ALIGN);
tcg_gen_mov_i32(REG(B11_8), addr);
}
return;
case 0x00c3: /* movca.l R0,@Rm */
{
TCGv val = tcg_temp_new();
tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx, MO_TEUL);
tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx,
MO_TEUL | MO_ALIGN);
gen_helper_movcal(cpu_env, REG(B11_8), val);
tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx,
MO_TEUL | MO_ALIGN);
}
ctx->has_movcal = 1;
return;
@ -1492,11 +1518,13 @@ static void _decode_opc(DisasContext * ctx)
cpu_lock_addr, fail);
tmp = tcg_temp_new();
tcg_gen_atomic_cmpxchg_i32(tmp, REG(B11_8), cpu_lock_value,
REG(0), ctx->memidx, MO_TEUL);
REG(0), ctx->memidx,
MO_TEUL | MO_ALIGN);
tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_t, tmp, cpu_lock_value);
} else {
tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_lock_addr, -1, fail);
tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx,
MO_TEUL | MO_ALIGN);
tcg_gen_movi_i32(cpu_sr_t, 1);
}
tcg_gen_br(done);
@ -1521,11 +1549,13 @@ static void _decode_opc(DisasContext * ctx)
if ((tb_cflags(ctx->base.tb) & CF_PARALLEL)) {
TCGv tmp = tcg_temp_new();
tcg_gen_mov_i32(tmp, REG(B11_8));
tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL);
tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx,
MO_TESL | MO_ALIGN);
tcg_gen_mov_i32(cpu_lock_value, REG(0));
tcg_gen_mov_i32(cpu_lock_addr, tmp);
} else {
tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL);
tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx,
MO_TESL | MO_ALIGN);
tcg_gen_movi_i32(cpu_lock_addr, 0);
}
return;

View file

@ -1580,13 +1580,6 @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
}
}
static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
{
ptrdiff_t offset = tcg_pcrel_diff(s, target);
tcg_debug_assert(offset == sextract64(offset, 0, 21));
tcg_out_insn(s, 3406, ADR, rd, offset);
}
typedef struct {
TCGReg base;
TCGReg index;
@ -1627,151 +1620,44 @@ static void * const qemu_st_helpers[MO_SIZE + 1] = {
#endif
};
static const TCGLdstHelperParam ldst_helper_param = {
.ntmp = 1, .tmp = { TCG_REG_TMP }
};
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
{
MemOpIdx oi = lb->oi;
MemOp opc = get_memop(oi);
MemOp opc = get_memop(lb->oi);
if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
tcg_out_adr(s, TCG_REG_X3, lb->raddr);
tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
tcg_out_movext(s, lb->type, lb->datalo_reg,
TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_X0);
tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
tcg_out_goto(s, lb->raddr);
return true;
}
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
{
MemOpIdx oi = lb->oi;
MemOp opc = get_memop(oi);
MemOp size = opc & MO_SIZE;
MemOp opc = get_memop(lb->oi);
if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
tcg_out_adr(s, TCG_REG_X4, lb->raddr);
tcg_out_st_helper_args(s, lb, &ldst_helper_param);
tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
tcg_out_goto(s, lb->raddr);
return true;
}
static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
TCGType ext, TCGReg data_reg, TCGReg addr_reg,
tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
{
TCGLabelQemuLdst *label = new_ldst_label(s);
label->is_ld = is_ld;
label->oi = oi;
label->type = ext;
label->datalo_reg = data_reg;
label->addrlo_reg = addr_reg;
label->raddr = tcg_splitwx_to_rx(raddr);
label->label_ptr[0] = label_ptr;
}
/* We expect to use a 7-bit scaled negative offset from ENV. */
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
/* These offsets are built into the LDP below. */
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
/* Load and compare a TLB entry, emitting the conditional jump to the
slow path for the failure case, which will be patched later when finalizing
the slow path. Generated code returns the host addend in X1,
clobbers X0,X2,X3,TMP. */
static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
tcg_insn_unit **label_ptr, int mem_index,
bool is_read)
{
unsigned a_bits = get_alignment_bits(opc);
unsigned s_bits = opc & MO_SIZE;
unsigned a_mask = (1u << a_bits) - 1;
unsigned s_mask = (1u << s_bits) - 1;
TCGReg x3;
TCGType mask_type;
uint64_t compare_mask;
mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
? TCG_TYPE_I64 : TCG_TYPE_I32);
/* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
TLB_MASK_TABLE_OFS(mem_index), 1, 0);
/* Extract the TLB index from the address into X0. */
tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
TCG_REG_X0, TCG_REG_X0, addr_reg,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
/* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
/* Load the tlb comparator into X0, and the fast path addend into X1. */
tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write));
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
offsetof(CPUTLBEntry, addend));
/* For aligned accesses, we check the first byte and include the alignment
bits within the address. For unaligned access, we check that we don't
cross pages using the address of the last byte of the access. */
if (a_bits >= s_bits) {
x3 = addr_reg;
} else {
tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
TCG_REG_X3, addr_reg, s_mask - a_mask);
x3 = TCG_REG_X3;
}
compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
/* Store the page mask part of the address into X3. */
tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
TCG_REG_X3, x3, compare_mask);
/* Perform the address comparison. */
tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
/* If not equal, we jump to the slow path. */
*label_ptr = s->code_ptr;
tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
}
#else
static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg,
unsigned a_bits)
static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
{
unsigned a_mask = (1 << a_bits) - 1;
TCGLabelQemuLdst *label = new_ldst_label(s);
label->is_ld = is_ld;
label->addrlo_reg = addr_reg;
/* tst addr, #mask */
tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
label->label_ptr[0] = s->code_ptr;
/* b.ne slow_path */
tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
label->raddr = tcg_splitwx_to_rx(s->code_ptr);
ptrdiff_t offset = tcg_pcrel_diff(s, target);
tcg_debug_assert(offset == sextract64(offset, 0, 21));
tcg_out_insn(s, 3406, ADR, rd, offset);
}
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
@ -1801,6 +1687,125 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
}
#endif /* CONFIG_SOFTMMU */
/*
* For softmmu, perform the TLB load and compare.
* For useronly, perform any required alignment tests.
* In both cases, return a TCGLabelQemuLdst structure if the slow path
* is required and fill in @h with the host address for the fast path.
*/
static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
TCGReg addr_reg, MemOpIdx oi,
bool is_ld)
{
TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
TCGLabelQemuLdst *ldst = NULL;
MemOp opc = get_memop(oi);
unsigned a_bits = get_alignment_bits(opc);
unsigned a_mask = (1u << a_bits) - 1;
#ifdef CONFIG_SOFTMMU
unsigned s_bits = opc & MO_SIZE;
unsigned s_mask = (1u << s_bits) - 1;
unsigned mem_index = get_mmuidx(oi);
TCGReg x3;
TCGType mask_type;
uint64_t compare_mask;
ldst = new_ldst_label(s);
ldst->is_ld = is_ld;
ldst->oi = oi;
ldst->addrlo_reg = addr_reg;
mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
? TCG_TYPE_I64 : TCG_TYPE_I32);
/* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
TLB_MASK_TABLE_OFS(mem_index), 1, 0);
/* Extract the TLB index from the address into X0. */
tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
TCG_REG_X0, TCG_REG_X0, addr_reg,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
/* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
/* Load the tlb comparator into X0, and the fast path addend into X1. */
tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1,
is_ld ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write));
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
offsetof(CPUTLBEntry, addend));
/*
* For aligned accesses, we check the first byte and include the alignment
* bits within the address. For unaligned access, we check that we don't
* cross pages using the address of the last byte of the access.
*/
if (a_bits >= s_bits) {
x3 = addr_reg;
} else {
tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
TCG_REG_X3, addr_reg, s_mask - a_mask);
x3 = TCG_REG_X3;
}
compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
/* Store the page mask part of the address into X3. */
tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
TCG_REG_X3, x3, compare_mask);
/* Perform the address comparison. */
tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
/* If not equal, we jump to the slow path. */
ldst->label_ptr[0] = s->code_ptr;
tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
*h = (HostAddress){
.base = TCG_REG_X1,
.index = addr_reg,
.index_ext = addr_type
};
#else
if (a_mask) {
ldst = new_ldst_label(s);
ldst->is_ld = is_ld;
ldst->oi = oi;
ldst->addrlo_reg = addr_reg;
/* tst addr, #mask */
tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
/* b.ne slow_path */
ldst->label_ptr[0] = s->code_ptr;
tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
}
if (USE_GUEST_BASE) {
*h = (HostAddress){
.base = TCG_REG_GUEST_BASE,
.index = addr_reg,
.index_ext = addr_type
};
} else {
*h = (HostAddress){
.base = addr_reg,
.index = TCG_REG_XZR,
.index_ext = TCG_TYPE_I64
};
}
#endif
return ldst;
}
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
TCGReg data_r, HostAddress h)
{
@ -1857,93 +1862,33 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
MemOpIdx oi, TCGType data_type)
{
MemOp memop = get_memop(oi);
TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
TCGLabelQemuLdst *ldst;
HostAddress h;
/* Byte swapping is left to middle-end expansion. */
tcg_debug_assert((memop & MO_BSWAP) == 0);
ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h);
#ifdef CONFIG_SOFTMMU
tcg_insn_unit *label_ptr;
tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, get_mmuidx(oi), 1);
h = (HostAddress){
.base = TCG_REG_X1,
.index = addr_reg,
.index_ext = addr_type
};
tcg_out_qemu_ld_direct(s, memop, data_type, data_reg, h);
add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
s->code_ptr, label_ptr);
#else /* !CONFIG_SOFTMMU */
unsigned a_bits = get_alignment_bits(memop);
if (a_bits) {
tcg_out_test_alignment(s, true, addr_reg, a_bits);
if (ldst) {
ldst->type = data_type;
ldst->datalo_reg = data_reg;
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
if (USE_GUEST_BASE) {
h = (HostAddress){
.base = TCG_REG_GUEST_BASE,
.index = addr_reg,
.index_ext = addr_type
};
} else {
h = (HostAddress){
.base = addr_reg,
.index = TCG_REG_XZR,
.index_ext = TCG_TYPE_I64
};
}
tcg_out_qemu_ld_direct(s, memop, data_type, data_reg, h);
#endif /* CONFIG_SOFTMMU */
}
static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
MemOpIdx oi, TCGType data_type)
{
MemOp memop = get_memop(oi);
TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
TCGLabelQemuLdst *ldst;
HostAddress h;
/* Byte swapping is left to middle-end expansion. */
tcg_debug_assert((memop & MO_BSWAP) == 0);
ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
#ifdef CONFIG_SOFTMMU
tcg_insn_unit *label_ptr;
tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, get_mmuidx(oi), 0);
h = (HostAddress){
.base = TCG_REG_X1,
.index = addr_reg,
.index_ext = addr_type
};
tcg_out_qemu_st_direct(s, memop, data_reg, h);
add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
s->code_ptr, label_ptr);
#else /* !CONFIG_SOFTMMU */
unsigned a_bits = get_alignment_bits(memop);
if (a_bits) {
tcg_out_test_alignment(s, false, addr_reg, a_bits);
if (ldst) {
ldst->type = data_type;
ldst->datalo_reg = data_reg;
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
if (USE_GUEST_BASE) {
h = (HostAddress){
.base = TCG_REG_GUEST_BASE,
.index = addr_reg,
.index_ext = addr_type
};
} else {
h = (HostAddress){
.base = addr_reg,
.index = TCG_REG_XZR,
.index_ext = TCG_TYPE_I64
};
}
tcg_out_qemu_st_direct(s, memop, data_reg, h);
#endif /* CONFIG_SOFTMMU */
}
static const tcg_insn_unit *tb_ret_addr;

View file

@ -690,8 +690,8 @@ tcg_out_ldrd_rwb(TCGContext *s, ARMCond cond, TCGReg rt, TCGReg rn, TCGReg rm)
tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 1);
}
static void tcg_out_strd_8(TCGContext *s, ARMCond cond, TCGReg rt,
TCGReg rn, int imm8)
static void __attribute__((unused))
tcg_out_strd_8(TCGContext *s, ARMCond cond, TCGReg rt, TCGReg rn, int imm8)
{
tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
}
@ -969,28 +969,16 @@ static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
tcg_out_dat_imm(s, COND_AL, ARITH_AND, rd, rn, 0xff);
}
static void __attribute__((unused))
tcg_out_ext8u_cond(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
{
tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
}
static void tcg_out_ext16s(TCGContext *s, TCGType t, TCGReg rd, TCGReg rn)
{
/* sxth */
tcg_out32(s, 0x06bf0070 | (COND_AL << 28) | (rd << 12) | rn);
}
static void tcg_out_ext16u_cond(TCGContext *s, ARMCond cond,
TCGReg rd, TCGReg rn)
{
/* uxth */
tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn);
}
static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
{
tcg_out_ext16u_cond(s, COND_AL, rd, rn);
/* uxth */
tcg_out32(s, 0x06ff0070 | (COND_AL << 28) | (rd << 12) | rn);
}
static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
@ -1382,82 +1370,121 @@ static void * const qemu_st_helpers[MO_SIZE + 1] = {
#endif
};
/* Helper routines for marshalling helper function arguments into
* the correct registers and stack.
* argreg is where we want to put this argument, arg is the argument itself.
* Return value is the updated argreg ready for the next call.
* Note that argreg 0..3 is real registers, 4+ on stack.
*
* We provide routines for arguments which are: immediate, 32 bit
* value in register, 16 and 8 bit values in register (which must be zero
* extended before use) and 64 bit value in a lo:hi register pair.
*/
#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG) \
static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg) \
{ \
if (argreg < 4) { \
MOV_ARG(s, COND_AL, argreg, arg); \
} else { \
int ofs = (argreg - 4) * 4; \
EXT_ARG; \
tcg_debug_assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE); \
tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs); \
} \
return argreg + 1; \
static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
{
/* We arrive at the slow path via "BLNE", so R14 contains l->raddr. */
return TCG_REG_R14;
}
DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
(tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u_cond,
(tcg_out_ext8u_cond(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u_cond,
(tcg_out_ext16u_cond(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
static const TCGLdstHelperParam ldst_helper_param = {
.ra_gen = ldst_ra_gen,
.ntmp = 1,
.tmp = { TCG_REG_TMP },
};
static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
TCGReg arglo, TCGReg arghi)
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
{
/* 64 bit arguments must go in even/odd register pairs
* and in 8-aligned stack slots.
*/
if (argreg & 1) {
argreg++;
MemOp opc = get_memop(lb->oi);
if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
if (argreg >= 4 && (arglo & 1) == 0 && arghi == arglo + 1) {
tcg_out_strd_8(s, COND_AL, arglo,
TCG_REG_CALL_STACK, (argreg - 4) * 4);
return argreg + 2;
tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
tcg_out_goto(s, COND_AL, lb->raddr);
return true;
}
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
{
MemOp opc = get_memop(lb->oi);
if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
tcg_out_st_helper_args(s, lb, &ldst_helper_param);
/* Tail-call to the helper, which will return to the fast path. */
tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & MO_SIZE]);
return true;
}
#else
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
{
if (!reloc_pc24(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
if (TARGET_LONG_BITS == 64) {
/* 64-bit target address is aligned into R2:R3. */
TCGMovExtend ext[2] = {
{ .dst = TCG_REG_R2, .dst_type = TCG_TYPE_I32,
.src = l->addrlo_reg,
.src_type = TCG_TYPE_I32, .src_ext = MO_UL },
{ .dst = TCG_REG_R3, .dst_type = TCG_TYPE_I32,
.src = l->addrhi_reg,
.src_type = TCG_TYPE_I32, .src_ext = MO_UL },
};
tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP);
} else {
argreg = tcg_out_arg_reg32(s, argreg, arglo);
argreg = tcg_out_arg_reg32(s, argreg, arghi);
return argreg;
tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, l->addrlo_reg);
}
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_AREG0);
/*
* Tail call to the helper, with the return address back inline,
* just for the clarity of the debugging traceback -- the helper
* cannot return. We have used BLNE to arrive here, so LR is
* already set.
*/
tcg_out_goto(s, COND_AL, (const void *)
(l->is_ld ? helper_unaligned_ld : helper_unaligned_st));
return true;
}
#define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
/* We expect to use an 9-bit sign-magnitude negative offset from ENV. */
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256);
/* These offsets are built into the LDRD below. */
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4);
/* Load and compare a TLB entry, leaving the flags set. Returns the register
containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */
static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
MemOp opc, int mem_index, bool is_load)
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write));
return tcg_out_fail_alignment(s, l);
}
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
return tcg_out_fail_alignment(s, l);
}
#endif /* SOFTMMU */
static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
TCGReg addrlo, TCGReg addrhi,
MemOpIdx oi, bool is_ld)
{
TCGLabelQemuLdst *ldst = NULL;
MemOp opc = get_memop(oi);
MemOp a_bits = get_alignment_bits(opc);
unsigned a_mask = (1 << a_bits) - 1;
#ifdef CONFIG_SOFTMMU
int mem_index = get_mmuidx(oi);
int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write);
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
unsigned s_mask = (1 << (opc & MO_SIZE)) - 1;
unsigned a_mask = (1 << get_alignment_bits(opc)) - 1;
TCGReg t_addr;
ldst = new_ldst_label(s);
ldst->is_ld = is_ld;
ldst->oi = oi;
ldst->addrlo_reg = addrlo;
ldst->addrhi_reg = addrhi;
/* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256);
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4);
tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off);
/* Extract the tlb index from the address into R0. */
@ -1527,182 +1554,37 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
}
return TCG_REG_R1;
}
/* Record the context of a call to the out of line helper code for the slow
path for a load or store, so that we can later generate the correct
helper code. */
static void add_qemu_ldst_label(TCGContext *s, bool is_ld,
MemOpIdx oi, TCGType type,
TCGReg datalo, TCGReg datahi,
TCGReg addrlo, TCGReg addrhi,
tcg_insn_unit *raddr,
tcg_insn_unit *label_ptr)
{
TCGLabelQemuLdst *label = new_ldst_label(s);
label->is_ld = is_ld;
label->oi = oi;
label->type = type;
label->datalo_reg = datalo;
label->datahi_reg = datahi;
label->addrlo_reg = addrlo;
label->addrhi_reg = addrhi;
label->raddr = tcg_splitwx_to_rx(raddr);
label->label_ptr[0] = label_ptr;
}
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
{
TCGReg argreg;
MemOpIdx oi = lb->oi;
MemOp opc = get_memop(oi);
if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
if (TARGET_LONG_BITS == 64) {
argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
} else {
argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
}
argreg = tcg_out_arg_imm32(s, argreg, oi);
argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
/* Use the canonical unsigned helpers and minimize icache usage. */
tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
if ((opc & MO_SIZE) == MO_64) {
TCGMovExtend ext[2] = {
{ .dst = lb->datalo_reg, .dst_type = TCG_TYPE_I32,
.src = TCG_REG_R0, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
{ .dst = lb->datahi_reg, .dst_type = TCG_TYPE_I32,
.src = TCG_REG_R1, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
};
tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP);
} else {
tcg_out_movext(s, TCG_TYPE_I32, lb->datalo_reg,
TCG_TYPE_I32, opc & MO_SSIZE, TCG_REG_R0);
}
tcg_out_goto(s, COND_AL, lb->raddr);
return true;
}
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
{
TCGReg argreg, datalo, datahi;
MemOpIdx oi = lb->oi;
MemOp opc = get_memop(oi);
if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
argreg = TCG_REG_R0;
argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
if (TARGET_LONG_BITS == 64) {
argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
} else {
argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
}
datalo = lb->datalo_reg;
datahi = lb->datahi_reg;
switch (opc & MO_SIZE) {
case MO_8:
argreg = tcg_out_arg_reg8(s, argreg, datalo);
break;
case MO_16:
argreg = tcg_out_arg_reg16(s, argreg, datalo);
break;
case MO_32:
default:
argreg = tcg_out_arg_reg32(s, argreg, datalo);
break;
case MO_64:
argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi);
break;
}
argreg = tcg_out_arg_imm32(s, argreg, oi);
argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
/* Tail-call to the helper, which will return to the fast path. */
tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & MO_SIZE]);
return true;
}
*h = (HostAddress){
.cond = COND_AL,
.base = addrlo,
.index = TCG_REG_R1,
.index_scratch = true,
};
#else
if (a_mask) {
ldst = new_ldst_label(s);
ldst->is_ld = is_ld;
ldst->oi = oi;
ldst->addrlo_reg = addrlo;
ldst->addrhi_reg = addrhi;
static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
TCGReg addrhi, unsigned a_bits)
{
unsigned a_mask = (1 << a_bits) - 1;
TCGLabelQemuLdst *label = new_ldst_label(s);
label->is_ld = is_ld;
label->addrlo_reg = addrlo;
label->addrhi_reg = addrhi;
/* We are expecting a_bits to max out at 7, and can easily support 8. */
tcg_debug_assert(a_mask <= 0xff);
/* tst addr, #mask */
tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
/* blne slow_path */
label->label_ptr[0] = s->code_ptr;
tcg_out_bl_imm(s, COND_NE, 0);
label->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
{
if (!reloc_pc24(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
/* We are expecting a_bits to max out at 7 */
tcg_debug_assert(a_mask <= 0xff);
/* tst addr, #mask */
tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
}
if (TARGET_LONG_BITS == 64) {
/* 64-bit target address is aligned into R2:R3. */
TCGMovExtend ext[2] = {
{ .dst = TCG_REG_R2, .dst_type = TCG_TYPE_I32,
.src = l->addrlo_reg,
.src_type = TCG_TYPE_I32, .src_ext = MO_UL },
{ .dst = TCG_REG_R3, .dst_type = TCG_TYPE_I32,
.src = l->addrhi_reg,
.src_type = TCG_TYPE_I32, .src_ext = MO_UL },
};
tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP);
} else {
tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, l->addrlo_reg);
}
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_AREG0);
*h = (HostAddress){
.cond = COND_AL,
.base = addrlo,
.index = guest_base ? TCG_REG_GUEST_BASE : -1,
.index_scratch = false,
};
#endif
/*
* Tail call to the helper, with the return address back inline,
* just for the clarity of the debugging traceback -- the helper
* cannot return. We have used BLNE to arrive here, so LR is
* already set.
*/
tcg_out_goto(s, COND_AL, (const void *)
(l->is_ld ? helper_unaligned_ld : helper_unaligned_st));
return true;
return ldst;
}
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
return tcg_out_fail_alignment(s, l);
}
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
return tcg_out_fail_alignment(s, l);
}
#endif /* SOFTMMU */
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo,
TCGReg datahi, HostAddress h)
{
@ -1799,37 +1681,28 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
MemOpIdx oi, TCGType data_type)
{
MemOp opc = get_memop(oi);
TCGLabelQemuLdst *ldst;
HostAddress h;
#ifdef CONFIG_SOFTMMU
h.cond = COND_AL;
h.base = addrlo;
h.index_scratch = true;
h.index = tcg_out_tlb_read(s, addrlo, addrhi, opc, get_mmuidx(oi), 1);
ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
if (ldst) {
ldst->type = data_type;
ldst->datalo_reg = datalo;
ldst->datahi_reg = datahi;
/*
* This a conditional BL only to load a pointer within this opcode into
* LR for the slow path. We will not be using the value for a tail call.
*/
tcg_insn_unit *label_ptr = s->code_ptr;
tcg_out_bl_imm(s, COND_NE, 0);
/*
* This a conditional BL only to load a pointer within this
* opcode into LR for the slow path. We will not be using
* the value for a tail call.
*/
ldst->label_ptr[0] = s->code_ptr;
tcg_out_bl_imm(s, COND_NE, 0);
tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
add_qemu_ldst_label(s, true, oi, data_type, datalo, datahi,
addrlo, addrhi, s->code_ptr, label_ptr);
#else
unsigned a_bits = get_alignment_bits(opc);
if (a_bits) {
tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
} else {
tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
}
h.cond = COND_AL;
h.base = addrlo;
h.index = guest_base ? TCG_REG_GUEST_BASE : -1;
h.index_scratch = false;
tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
#endif
}
static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo,
@ -1891,35 +1764,25 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
MemOpIdx oi, TCGType data_type)
{
MemOp opc = get_memop(oi);
TCGLabelQemuLdst *ldst;
HostAddress h;
#ifdef CONFIG_SOFTMMU
h.cond = COND_EQ;
h.base = addrlo;
h.index_scratch = true;
h.index = tcg_out_tlb_read(s, addrlo, addrhi, opc, get_mmuidx(oi), 0);
tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
if (ldst) {
ldst->type = data_type;
ldst->datalo_reg = datalo;
ldst->datahi_reg = datahi;
/* The conditional call must come last, as we're going to return here. */
tcg_insn_unit *label_ptr = s->code_ptr;
tcg_out_bl_imm(s, COND_NE, 0);
add_qemu_ldst_label(s, false, oi, data_type, datalo, datahi,
addrlo, addrhi, s->code_ptr, label_ptr);
#else
unsigned a_bits = get_alignment_bits(opc);
h.cond = COND_AL;
if (a_bits) {
tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
h.cond = COND_EQ;
}
tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
h.base = addrlo;
h.index = guest_base ? TCG_REG_GUEST_BASE : -1;
h.index_scratch = false;
tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
#endif
/* The conditional call is last, as we're going to return here. */
ldst->label_ptr[0] = s->code_ptr;
tcg_out_bl_imm(s, COND_NE, 0);
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
} else {
tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
}
}
static void tcg_out_epilogue(TCGContext *s);

View file

@ -1802,142 +1802,37 @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
[MO_BEUQ] = helper_be_stq_mmu,
};
/* Perform the TLB load and compare.
Inputs:
ADDRLO and ADDRHI contain the low and high part of the address.
MEM_INDEX and S_BITS are the memory context and log2 size of the load.
WHICH is the offset into the CPUTLBEntry structure of the slot to read.
This should be offsetof addr_read or addr_write.
Outputs:
LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
positions of the displacements of forward jumps to the TLB miss case.
Second argument register is loaded with the low part of the address.
In the TLB hit case, it has been adjusted as indicated by the TLB
and so is a host address. In the TLB miss case, it continues to
hold a guest address.
First argument register is clobbered. */
static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
int mem_index, MemOp opc,
tcg_insn_unit **label_ptr, int which)
{
TCGType ttype = TCG_TYPE_I32;
TCGType tlbtype = TCG_TYPE_I32;
int trexw = 0, hrexw = 0, tlbrexw = 0;
unsigned a_bits = get_alignment_bits(opc);
unsigned s_bits = opc & MO_SIZE;
unsigned a_mask = (1 << a_bits) - 1;
unsigned s_mask = (1 << s_bits) - 1;
target_ulong tlb_mask;
if (TCG_TARGET_REG_BITS == 64) {
if (TARGET_LONG_BITS == 64) {
ttype = TCG_TYPE_I64;
trexw = P_REXW;
}
if (TCG_TYPE_PTR == TCG_TYPE_I64) {
hrexw = P_REXW;
if (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32) {
tlbtype = TCG_TYPE_I64;
tlbrexw = P_REXW;
}
}
}
tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo);
tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0,
TLB_MASK_TABLE_OFS(mem_index) +
offsetof(CPUTLBDescFast, mask));
tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0,
TLB_MASK_TABLE_OFS(mem_index) +
offsetof(CPUTLBDescFast, table));
/* If the required alignment is at least as large as the access, simply
copy the address and mask. For lesser alignments, check that we don't
cross pages for the complete access. */
if (a_bits >= s_bits) {
tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
} else {
tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
addrlo, s_mask - a_mask);
}
tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
/* cmp 0(TCG_REG_L0), TCG_REG_L1 */
tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw,
TCG_REG_L1, TCG_REG_L0, which);
/* Prepare for both the fast path add of the tlb addend, and the slow
path function argument setup. */
tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
/* jne slow_path */
tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
label_ptr[0] = s->code_ptr;
s->code_ptr += 4;
if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
/* cmp 4(TCG_REG_L0), addrhi */
tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, TCG_REG_L0, which + 4);
/* jne slow_path */
tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
label_ptr[1] = s->code_ptr;
s->code_ptr += 4;
}
/* TLB Hit. */
/* add addend(TCG_REG_L0), TCG_REG_L1 */
tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L1, TCG_REG_L0,
offsetof(CPUTLBEntry, addend));
}
/*
* Record the context of a call to the out of line helper code for the slow path
* for a load or store, so that we can later generate the correct helper code
* Because i686 has no register parameters and because x86_64 has xchg
* to handle addr/data register overlap, we have placed all input arguments
* before we need might need a scratch reg.
*
* Even then, a scratch is only needed for l->raddr. Rather than expose
* a general-purpose scratch when we don't actually know it's available,
* use the ra_gen hook to load into RAX if needed.
*/
static void add_qemu_ldst_label(TCGContext *s, bool is_ld,
TCGType type, MemOpIdx oi,
TCGReg datalo, TCGReg datahi,
TCGReg addrlo, TCGReg addrhi,
tcg_insn_unit *raddr,
tcg_insn_unit **label_ptr)
#if TCG_TARGET_REG_BITS == 64
static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
{
TCGLabelQemuLdst *label = new_ldst_label(s);
label->is_ld = is_ld;
label->oi = oi;
label->type = type;
label->datalo_reg = datalo;
label->datahi_reg = datahi;
label->addrlo_reg = addrlo;
label->addrhi_reg = addrhi;
label->raddr = tcg_splitwx_to_rx(raddr);
label->label_ptr[0] = label_ptr[0];
if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
label->label_ptr[1] = label_ptr[1];
if (arg < 0) {
arg = TCG_REG_RAX;
}
tcg_out_movi(s, TCG_TYPE_PTR, arg, (uintptr_t)l->raddr);
return arg;
}
static const TCGLdstHelperParam ldst_helper_param = {
.ra_gen = ldst_ra_gen
};
#else
static const TCGLdstHelperParam ldst_helper_param = { };
#endif
/*
* Generate code for the slow path for a load at the end of block
*/
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
MemOpIdx oi = l->oi;
MemOp opc = get_memop(oi);
MemOp opc = get_memop(l->oi);
tcg_insn_unit **label_ptr = &l->label_ptr[0];
/* resolve label address */
@ -1946,48 +1841,10 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
}
if (TCG_TARGET_REG_BITS == 32) {
int ofs = 0;
tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
ofs += 4;
tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
ofs += 4;
if (TARGET_LONG_BITS == 64) {
tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
ofs += 4;
}
tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
ofs += 4;
tcg_out_sti(s, TCG_TYPE_PTR, (uintptr_t)l->raddr, TCG_REG_ESP, ofs);
} else {
tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
/* The second argument is already loaded with addrlo. */
tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi);
tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
(uintptr_t)l->raddr);
}
tcg_out_ld_helper_args(s, l, &ldst_helper_param);
tcg_out_branch(s, 1, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
tcg_out_ld_helper_ret(s, l, false, &ldst_helper_param);
if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
TCGMovExtend ext[2] = {
{ .dst = l->datalo_reg, .dst_type = TCG_TYPE_I32,
.src = TCG_REG_EAX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
{ .dst = l->datahi_reg, .dst_type = TCG_TYPE_I32,
.src = TCG_REG_EDX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
};
tcg_out_movext2(s, &ext[0], &ext[1], -1);
} else {
tcg_out_movext(s, l->type, l->datalo_reg,
TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_EAX);
}
/* Jump to the code corresponding to next IR of qemu_st */
tcg_out_jmp(s, l->raddr);
return true;
}
@ -1997,11 +1854,8 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
*/
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
MemOpIdx oi = l->oi;
MemOp opc = get_memop(oi);
MemOp s_bits = opc & MO_SIZE;
MemOp opc = get_memop(l->oi);
tcg_insn_unit **label_ptr = &l->label_ptr[0];
TCGReg retaddr;
/* resolve label address */
tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
@ -2009,79 +1863,13 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
}
if (TCG_TARGET_REG_BITS == 32) {
int ofs = 0;
tcg_out_st_helper_args(s, l, &ldst_helper_param);
tcg_out_branch(s, 1, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
ofs += 4;
tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
ofs += 4;
if (TARGET_LONG_BITS == 64) {
tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
ofs += 4;
}
tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
ofs += 4;
if (s_bits == MO_64) {
tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
ofs += 4;
}
tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
ofs += 4;
retaddr = TCG_REG_EAX;
tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs);
} else {
tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
/* The second argument is already loaded with addrlo. */
tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
tcg_target_call_iarg_regs[2], l->datalo_reg);
tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi);
if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
retaddr = tcg_target_call_iarg_regs[4];
tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
} else {
retaddr = TCG_REG_RAX;
tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP,
TCG_TARGET_CALL_STACK_OFFSET);
}
}
/* "Tail call" to the helper, with the return address back inline. */
tcg_out_push(s, retaddr);
tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
tcg_out_jmp(s, l->raddr);
return true;
}
#else
static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
TCGReg addrhi, unsigned a_bits)
{
unsigned a_mask = (1 << a_bits) - 1;
TCGLabelQemuLdst *label;
tcg_out_testi(s, addrlo, a_mask);
/* jne slow_path */
tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
label = new_ldst_label(s);
label->is_ld = is_ld;
label->addrlo_reg = addrlo;
label->addrhi_reg = addrhi;
label->raddr = tcg_splitwx_to_rx(s->code_ptr + 4);
label->label_ptr[0] = s->code_ptr;
s->code_ptr += 4;
}
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
{
/* resolve label address */
@ -2159,6 +1947,128 @@ static inline int setup_guest_base_seg(void)
#endif /* setup_guest_base_seg */
#endif /* SOFTMMU */
/*
* For softmmu, perform the TLB load and compare.
* For useronly, perform any required alignment tests.
* In both cases, return a TCGLabelQemuLdst structure if the slow path
* is required and fill in @h with the host address for the fast path.
*/
static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
TCGReg addrlo, TCGReg addrhi,
MemOpIdx oi, bool is_ld)
{
TCGLabelQemuLdst *ldst = NULL;
MemOp opc = get_memop(oi);
unsigned a_bits = get_alignment_bits(opc);
unsigned a_mask = (1 << a_bits) - 1;
#ifdef CONFIG_SOFTMMU
int cmp_ofs = is_ld ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write);
TCGType ttype = TCG_TYPE_I32;
TCGType tlbtype = TCG_TYPE_I32;
int trexw = 0, hrexw = 0, tlbrexw = 0;
unsigned mem_index = get_mmuidx(oi);
unsigned s_bits = opc & MO_SIZE;
unsigned s_mask = (1 << s_bits) - 1;
target_ulong tlb_mask;
ldst = new_ldst_label(s);
ldst->is_ld = is_ld;
ldst->oi = oi;
ldst->addrlo_reg = addrlo;
ldst->addrhi_reg = addrhi;
if (TCG_TARGET_REG_BITS == 64) {
if (TARGET_LONG_BITS == 64) {
ttype = TCG_TYPE_I64;
trexw = P_REXW;
}
if (TCG_TYPE_PTR == TCG_TYPE_I64) {
hrexw = P_REXW;
if (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32) {
tlbtype = TCG_TYPE_I64;
tlbrexw = P_REXW;
}
}
}
tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo);
tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0,
TLB_MASK_TABLE_OFS(mem_index) +
offsetof(CPUTLBDescFast, mask));
tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0,
TLB_MASK_TABLE_OFS(mem_index) +
offsetof(CPUTLBDescFast, table));
/*
* If the required alignment is at least as large as the access, simply
* copy the address and mask. For lesser alignments, check that we don't
* cross pages for the complete access.
*/
if (a_bits >= s_bits) {
tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
} else {
tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
addrlo, s_mask - a_mask);
}
tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
/* cmp 0(TCG_REG_L0), TCG_REG_L1 */
tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw,
TCG_REG_L1, TCG_REG_L0, cmp_ofs);
/* jne slow_path */
tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
ldst->label_ptr[0] = s->code_ptr;
s->code_ptr += 4;
if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
/* cmp 4(TCG_REG_L0), addrhi */
tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, TCG_REG_L0, cmp_ofs + 4);
/* jne slow_path */
tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
ldst->label_ptr[1] = s->code_ptr;
s->code_ptr += 4;
}
/* TLB Hit. */
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L0, TCG_REG_L0,
offsetof(CPUTLBEntry, addend));
*h = (HostAddress) {
.base = addrlo,
.index = TCG_REG_L0,
};
#else
if (a_bits) {
ldst = new_ldst_label(s);
ldst->is_ld = is_ld;
ldst->oi = oi;
ldst->addrlo_reg = addrlo;
ldst->addrhi_reg = addrhi;
tcg_out_testi(s, addrlo, a_mask);
/* jne slow_path */
tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
ldst->label_ptr[0] = s->code_ptr;
s->code_ptr += 4;
}
*h = x86_guest_base;
h->base = addrlo;
#endif
return ldst;
}
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
HostAddress h, TCGType type, MemOp memop)
{
@ -2258,35 +2168,18 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
TCGReg addrlo, TCGReg addrhi,
MemOpIdx oi, TCGType data_type)
{
MemOp opc = get_memop(oi);
TCGLabelQemuLdst *ldst;
HostAddress h;
#if defined(CONFIG_SOFTMMU)
tcg_insn_unit *label_ptr[2];
ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, get_memop(oi));
tcg_out_tlb_load(s, addrlo, addrhi, get_mmuidx(oi), opc,
label_ptr, offsetof(CPUTLBEntry, addr_read));
/* TLB Hit. */
h.base = TCG_REG_L1;
h.index = -1;
h.ofs = 0;
h.seg = 0;
tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, opc);
/* Record the current context of a load into ldst label */
add_qemu_ldst_label(s, true, data_type, oi, datalo, datahi,
addrlo, addrhi, s->code_ptr, label_ptr);
#else
unsigned a_bits = get_alignment_bits(opc);
if (a_bits) {
tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
if (ldst) {
ldst->type = data_type;
ldst->datalo_reg = datalo;
ldst->datahi_reg = datahi;
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
h = x86_guest_base;
h.base = addrlo;
tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, opc);
#endif
}
static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
@ -2345,36 +2238,18 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
TCGReg addrlo, TCGReg addrhi,
MemOpIdx oi, TCGType data_type)
{
MemOp opc = get_memop(oi);
TCGLabelQemuLdst *ldst;
HostAddress h;
#if defined(CONFIG_SOFTMMU)
tcg_insn_unit *label_ptr[2];
ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
tcg_out_qemu_st_direct(s, datalo, datahi, h, get_memop(oi));
tcg_out_tlb_load(s, addrlo, addrhi, get_mmuidx(oi), opc,
label_ptr, offsetof(CPUTLBEntry, addr_write));
/* TLB Hit. */
h.base = TCG_REG_L1;
h.index = -1;
h.ofs = 0;
h.seg = 0;
tcg_out_qemu_st_direct(s, datalo, datahi, h, opc);
/* Record the current context of a store into ldst label */
add_qemu_ldst_label(s, false, data_type, oi, datalo, datahi,
addrlo, addrhi, s->code_ptr, label_ptr);
#else
unsigned a_bits = get_alignment_bits(opc);
if (a_bits) {
tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
if (ldst) {
ldst->type = data_type;
ldst->datalo_reg = datalo;
ldst->datahi_reg = datahi;
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
h = x86_guest_base;
h.base = addrlo;
tcg_out_qemu_st_direct(s, datalo, datahi, h, opc);
#endif
}
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)

View file

@ -17,9 +17,7 @@
C_O0_I1(r)
C_O0_I2(rZ, r)
C_O0_I2(rZ, rZ)
C_O0_I2(LZ, L)
C_O1_I1(r, r)
C_O1_I1(r, L)
C_O1_I2(r, r, rC)
C_O1_I2(r, r, ri)
C_O1_I2(r, r, rI)

View file

@ -14,7 +14,6 @@
* REGS(letter, register_mask)
*/
REGS('r', ALL_GENERAL_REGS)
REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
/*
* Define constraint letters for constants:

View file

@ -133,18 +133,7 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
#define TCG_CT_CONST_C12 0x1000
#define TCG_CT_CONST_WSZ 0x2000
#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
/*
* For softmmu, we need to avoid conflicts with the first 5
* argument registers to call the helper. Some of these are
* also used for the tlb lookup.
*/
#ifdef CONFIG_SOFTMMU
#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_A0, 5)
#else
#define SOFTMMU_RESERVE_REGS 0
#endif
#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len)
{
@ -818,156 +807,45 @@ static void * const qemu_st_helpers[4] = {
[MO_64] = helper_le_stq_mmu,
};
/* We expect to use a 12-bit negative offset from ENV. */
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
{
tcg_out_opc_b(s, 0);
return reloc_br_sd10k16(s->code_ptr - 1, target);
}
/*
* Emits common code for TLB addend lookup, that eventually loads the
* addend in TCG_REG_TMP2.
*/
static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl, MemOpIdx oi,
tcg_insn_unit **label_ptr, bool is_load)
{
MemOp opc = get_memop(oi);
unsigned s_bits = opc & MO_SIZE;
unsigned a_bits = get_alignment_bits(opc);
tcg_target_long compare_mask;
int mem_index = get_mmuidx(oi);
int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
tcg_out_opc_srli_d(s, TCG_REG_TMP2, addrl,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
/* Load the tlb comparator and the addend. */
tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
is_load ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write));
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
offsetof(CPUTLBEntry, addend));
/* We don't support unaligned accesses. */
if (a_bits < s_bits) {
a_bits = s_bits;
}
/* Clear the non-page, non-alignment bits from the address. */
compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
tcg_out_opc_and(s, TCG_REG_TMP1, TCG_REG_TMP1, addrl);
/* Compare masked address with the TLB entry. */
label_ptr[0] = s->code_ptr;
tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0);
/* TLB Hit - addend in TCG_REG_TMP2, ready for use. */
}
static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi,
TCGType type,
TCGReg datalo, TCGReg addrlo,
void *raddr, tcg_insn_unit **label_ptr)
{
TCGLabelQemuLdst *label = new_ldst_label(s);
label->is_ld = is_ld;
label->oi = oi;
label->type = type;
label->datalo_reg = datalo;
label->datahi_reg = 0; /* unused */
label->addrlo_reg = addrlo;
label->addrhi_reg = 0; /* unused */
label->raddr = tcg_splitwx_to_rx(raddr);
label->label_ptr[0] = label_ptr[0];
}
static const TCGLdstHelperParam ldst_helper_param = {
.ntmp = 1, .tmp = { TCG_REG_TMP0 }
};
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
MemOpIdx oi = l->oi;
MemOp opc = get_memop(oi);
MemOp size = opc & MO_SIZE;
MemOp opc = get_memop(l->oi);
/* resolve label address */
if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
/* call load helper */
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A1, l->addrlo_reg);
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A2, oi);
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, (tcg_target_long)l->raddr);
tcg_out_call_int(s, qemu_ld_helpers[size], false);
tcg_out_movext(s, l->type, l->datalo_reg,
TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_A0);
tcg_out_ld_helper_args(s, l, &ldst_helper_param);
tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE], false);
tcg_out_ld_helper_ret(s, l, false, &ldst_helper_param);
return tcg_out_goto(s, l->raddr);
}
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
MemOpIdx oi = l->oi;
MemOp opc = get_memop(oi);
MemOp size = opc & MO_SIZE;
MemOp opc = get_memop(l->oi);
/* resolve label address */
if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
/* call store helper */
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A1, l->addrlo_reg);
tcg_out_movext(s, size == MO_64 ? TCG_TYPE_I32 : TCG_TYPE_I32, TCG_REG_A2,
l->type, size, l->datalo_reg);
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, oi);
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A4, (tcg_target_long)l->raddr);
tcg_out_call_int(s, qemu_st_helpers[size], false);
tcg_out_st_helper_args(s, l, &ldst_helper_param);
tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
return tcg_out_goto(s, l->raddr);
}
#else
/*
* Alignment helpers for user-mode emulation
*/
static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg,
unsigned a_bits)
{
TCGLabelQemuLdst *l = new_ldst_label(s);
l->is_ld = is_ld;
l->addrlo_reg = addr_reg;
/*
* Without micro-architecture details, we don't know which of bstrpick or
* andi is faster, so use bstrpick as it's not constrained by imm field
* width. (Not to say alignments >= 2^12 are going to happen any time
* soon, though)
*/
tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1);
l->label_ptr[0] = s->code_ptr;
tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0);
l->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
{
/* resolve label address */
@ -997,27 +875,102 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
#endif /* CONFIG_SOFTMMU */
/*
* `ext32u` the address register into the temp register given,
* if target is 32-bit, no-op otherwise.
*
* Returns the address register ready for use with TLB addend.
*/
static TCGReg tcg_out_zext_addr_if_32_bit(TCGContext *s,
TCGReg addr, TCGReg tmp)
{
if (TARGET_LONG_BITS == 32) {
tcg_out_ext32u(s, tmp, addr);
return tmp;
}
return addr;
}
typedef struct {
TCGReg base;
TCGReg index;
} HostAddress;
/*
* For softmmu, perform the TLB load and compare.
* For useronly, perform any required alignment tests.
* In both cases, return a TCGLabelQemuLdst structure if the slow path
* is required and fill in @h with the host address for the fast path.
*/
static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
TCGReg addr_reg, MemOpIdx oi,
bool is_ld)
{
TCGLabelQemuLdst *ldst = NULL;
MemOp opc = get_memop(oi);
unsigned a_bits = get_alignment_bits(opc);
#ifdef CONFIG_SOFTMMU
unsigned s_bits = opc & MO_SIZE;
int mem_index = get_mmuidx(oi);
int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
tcg_target_long compare_mask;
ldst = new_ldst_label(s);
ldst->is_ld = is_ld;
ldst->oi = oi;
ldst->addrlo_reg = addr_reg;
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
tcg_out_opc_srli_d(s, TCG_REG_TMP2, addr_reg,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
/* Load the tlb comparator and the addend. */
tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
is_ld ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write));
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
offsetof(CPUTLBEntry, addend));
/* We don't support unaligned accesses. */
if (a_bits < s_bits) {
a_bits = s_bits;
}
/* Clear the non-page, non-alignment bits from the address. */
compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
tcg_out_opc_and(s, TCG_REG_TMP1, TCG_REG_TMP1, addr_reg);
/* Compare masked address with the TLB entry. */
ldst->label_ptr[0] = s->code_ptr;
tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0);
h->index = TCG_REG_TMP2;
#else
if (a_bits) {
ldst = new_ldst_label(s);
ldst->is_ld = is_ld;
ldst->oi = oi;
ldst->addrlo_reg = addr_reg;
/*
* Without micro-architecture details, we don't know which of
* bstrpick or andi is faster, so use bstrpick as it's not
* constrained by imm field width. Not to say alignments >= 2^12
* are going to happen any time soon.
*/
tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1);
ldst->label_ptr[0] = s->code_ptr;
tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0);
}
h->index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
#endif
if (TARGET_LONG_BITS == 32) {
h->base = TCG_REG_TMP0;
tcg_out_ext32u(s, h->base, addr_reg);
} else {
h->base = addr_reg;
}
return ldst;
}
static void tcg_out_qemu_ld_indexed(TCGContext *s, MemOp opc, TCGType type,
TCGReg rd, HostAddress h)
{
@ -1057,29 +1010,17 @@ static void tcg_out_qemu_ld_indexed(TCGContext *s, MemOp opc, TCGType type,
static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
MemOpIdx oi, TCGType data_type)
{
MemOp opc = get_memop(oi);
TCGLabelQemuLdst *ldst;
HostAddress h;
#ifdef CONFIG_SOFTMMU
tcg_insn_unit *label_ptr[1];
ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
tcg_out_qemu_ld_indexed(s, get_memop(oi), data_type, data_reg, h);
tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 1);
h.index = TCG_REG_TMP2;
#else
unsigned a_bits = get_alignment_bits(opc);
if (a_bits) {
tcg_out_test_alignment(s, true, addr_reg, a_bits);
if (ldst) {
ldst->type = data_type;
ldst->datalo_reg = data_reg;
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
h.index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
#endif
h.base = tcg_out_zext_addr_if_32_bit(s, addr_reg, TCG_REG_TMP0);
tcg_out_qemu_ld_indexed(s, opc, data_type, data_reg, h);
#ifdef CONFIG_SOFTMMU
add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
s->code_ptr, label_ptr);
#endif
}
static void tcg_out_qemu_st_indexed(TCGContext *s, MemOp opc,
@ -1109,29 +1050,17 @@ static void tcg_out_qemu_st_indexed(TCGContext *s, MemOp opc,
static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
MemOpIdx oi, TCGType data_type)
{
MemOp opc = get_memop(oi);
TCGLabelQemuLdst *ldst;
HostAddress h;
#ifdef CONFIG_SOFTMMU
tcg_insn_unit *label_ptr[1];
ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
tcg_out_qemu_st_indexed(s, get_memop(oi), data_reg, h);
tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 0);
h.index = TCG_REG_TMP2;
#else
unsigned a_bits = get_alignment_bits(opc);
if (a_bits) {
tcg_out_test_alignment(s, false, addr_reg, a_bits);
if (ldst) {
ldst->type = data_type;
ldst->datalo_reg = data_reg;
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
h.index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
#endif
h.base = tcg_out_zext_addr_if_32_bit(s, addr_reg, TCG_REG_TMP0);
tcg_out_qemu_st_indexed(s, opc, data_reg, h);
#ifdef CONFIG_SOFTMMU
add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
s->code_ptr, label_ptr);
#endif
}
/*
@ -1601,16 +1530,14 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_st32_i64:
case INDEX_op_st_i32:
case INDEX_op_st_i64:
case INDEX_op_qemu_st_i32:
case INDEX_op_qemu_st_i64:
return C_O0_I2(rZ, r);
case INDEX_op_brcond_i32:
case INDEX_op_brcond_i64:
return C_O0_I2(rZ, rZ);
case INDEX_op_qemu_st_i32:
case INDEX_op_qemu_st_i64:
return C_O0_I2(LZ, L);
case INDEX_op_ext8s_i32:
case INDEX_op_ext8s_i64:
case INDEX_op_ext8u_i32:
@ -1646,11 +1573,9 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_ld32u_i64:
case INDEX_op_ld_i32:
case INDEX_op_ld_i64:
return C_O1_I1(r, r);
case INDEX_op_qemu_ld_i32:
case INDEX_op_qemu_ld_i64:
return C_O1_I1(r, L);
return C_O1_I1(r, r);
case INDEX_op_andc_i32:
case INDEX_op_andc_i64:

View file

@ -12,15 +12,13 @@
C_O0_I1(r)
C_O0_I2(rZ, r)
C_O0_I2(rZ, rZ)
C_O0_I2(SZ, S)
C_O0_I3(SZ, S, S)
C_O0_I3(SZ, SZ, S)
C_O0_I3(rZ, r, r)
C_O0_I3(rZ, rZ, r)
C_O0_I4(rZ, rZ, rZ, rZ)
C_O0_I4(SZ, SZ, S, S)
C_O1_I1(r, L)
C_O0_I4(rZ, rZ, r, r)
C_O1_I1(r, r)
C_O1_I2(r, 0, rZ)
C_O1_I2(r, L, L)
C_O1_I2(r, r, r)
C_O1_I2(r, r, ri)
C_O1_I2(r, r, rI)
C_O1_I2(r, r, rIK)
@ -30,7 +28,6 @@ C_O1_I2(r, rZ, rN)
C_O1_I2(r, rZ, rZ)
C_O1_I4(r, rZ, rZ, rZ, 0)
C_O1_I4(r, rZ, rZ, rZ, rZ)
C_O2_I1(r, r, L)
C_O2_I2(r, r, L, L)
C_O2_I1(r, r, r)
C_O2_I2(r, r, r, r)
C_O2_I4(r, r, rZ, rZ, rN, rN)

View file

@ -9,8 +9,6 @@
* REGS(letter, register_mask)
*/
REGS('r', ALL_GENERAL_REGS)
REGS('L', ALL_QLOAD_REGS)
REGS('S', ALL_QSTORE_REGS)
/*
* Define constraint letters for constants:

File diff suppressed because it is too large Load diff

View file

@ -204,8 +204,8 @@ extern bool use_mips32r2_instructions;
#define TCG_TARGET_HAS_ext16u_i64 0 /* andi rt, rs, 0xffff */
#endif
#define TCG_TARGET_DEFAULT_MO (0)
#define TCG_TARGET_HAS_MEMORY_BSWAP 1
#define TCG_TARGET_DEFAULT_MO 0
#define TCG_TARGET_HAS_MEMORY_BSWAP 0
#define TCG_TARGET_NEED_LDST_LABELS

View file

@ -12,18 +12,15 @@
C_O0_I1(r)
C_O0_I2(r, r)
C_O0_I2(r, ri)
C_O0_I2(S, S)
C_O0_I2(v, r)
C_O0_I3(S, S, S)
C_O0_I3(r, r, r)
C_O0_I4(r, r, ri, ri)
C_O0_I4(S, S, S, S)
C_O1_I1(r, L)
C_O0_I4(r, r, r, r)
C_O1_I1(r, r)
C_O1_I1(v, r)
C_O1_I1(v, v)
C_O1_I1(v, vr)
C_O1_I2(r, 0, rZ)
C_O1_I2(r, L, L)
C_O1_I2(r, rI, ri)
C_O1_I2(r, rI, rT)
C_O1_I2(r, r, r)
@ -36,7 +33,7 @@ C_O1_I2(v, v, v)
C_O1_I3(v, v, v, v)
C_O1_I4(r, r, ri, rZ, rZ)
C_O1_I4(r, r, r, ri, ri)
C_O2_I1(L, L, L)
C_O2_I2(L, L, L, L)
C_O2_I1(r, r, r)
C_O2_I2(r, r, r, r)
C_O2_I4(r, r, rI, rZM, r, r)
C_O2_I4(r, r, r, r, rI, rZM)

View file

@ -10,19 +10,12 @@
*/
REGS('r', ALL_GENERAL_REGS)
REGS('v', ALL_VECTOR_REGS)
REGS('A', 1u << TCG_REG_R3)
REGS('B', 1u << TCG_REG_R4)
REGS('C', 1u << TCG_REG_R5)
REGS('D', 1u << TCG_REG_R6)
REGS('L', ALL_QLOAD_REGS)
REGS('S', ALL_QSTORE_REGS)
/*
* Define constraint letters for constants:
* CONST(letter, TCG_CT_CONST_* bit set)
*/
CONST('I', TCG_CT_CONST_S16)
CONST('J', TCG_CT_CONST_U16)
CONST('M', TCG_CT_CONST_MONE)
CONST('T', TCG_CT_CONST_S32)
CONST('U', TCG_CT_CONST_U32)

View file

@ -68,6 +68,7 @@
#else
# define TCG_REG_TMP1 TCG_REG_R12
#endif
#define TCG_REG_TMP2 TCG_REG_R11
#define TCG_VEC_TMP1 TCG_REG_V0
#define TCG_VEC_TMP2 TCG_REG_V1
@ -82,7 +83,6 @@
#define SZR (TCG_TARGET_REG_BITS / 8)
#define TCG_CT_CONST_S16 0x100
#define TCG_CT_CONST_U16 0x200
#define TCG_CT_CONST_S32 0x400
#define TCG_CT_CONST_U32 0x800
#define TCG_CT_CONST_ZERO 0x1000
@ -92,18 +92,6 @@
#define ALL_GENERAL_REGS 0xffffffffu
#define ALL_VECTOR_REGS 0xffffffff00000000ull
#ifdef CONFIG_SOFTMMU
#define ALL_QLOAD_REGS \
(ALL_GENERAL_REGS & \
~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | (1 << TCG_REG_R5)))
#define ALL_QSTORE_REGS \
(ALL_GENERAL_REGS & ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | \
(1 << TCG_REG_R5) | (1 << TCG_REG_R6)))
#else
#define ALL_QLOAD_REGS (ALL_GENERAL_REGS & ~(1 << TCG_REG_R3))
#define ALL_QSTORE_REGS ALL_QLOAD_REGS
#endif
TCGPowerISA have_isa;
static bool have_isel;
bool have_altivec;
@ -281,8 +269,6 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
return 1;
} else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) {
return 1;
} else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
return 1;
} else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
@ -2003,178 +1989,36 @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
[MO_BEUQ] = helper_be_stq_mmu,
};
/* We expect to use a 16-bit negative offset from ENV. */
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
/* Perform the TLB load and compare. Places the result of the comparison
in CR7, loads the addend of the TLB into R3, and returns the register
containing the guest address (zero-extended into R4). Clobbers R0 and R2. */
static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc,
TCGReg addrlo, TCGReg addrhi,
int mem_index, bool is_read)
static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
{
int cmp_off
= (is_read
? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write));
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
int table_off = fast_off + offsetof(CPUTLBDescFast, table);
unsigned s_bits = opc & MO_SIZE;
unsigned a_bits = get_alignment_bits(opc);
/* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off);
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off);
/* Extract the page index, shifted into place for tlb index. */
if (TCG_TARGET_REG_BITS == 32) {
tcg_out_shri32(s, TCG_REG_TMP1, addrlo,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
} else {
tcg_out_shri64(s, TCG_REG_TMP1, addrlo,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
if (arg < 0) {
arg = TCG_REG_TMP1;
}
tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1));
/* Load the TLB comparator. */
if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
? LWZUX : LDUX);
tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4));
} else {
tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4));
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
} else {
tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
}
}
/* Load the TLB addend for use on the fast path. Do this asap
to minimize any load use delay. */
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3,
offsetof(CPUTLBEntry, addend));
/* Clear the non-page, non-alignment bits from the address */
if (TCG_TARGET_REG_BITS == 32) {
/* We don't support unaligned accesses on 32-bits.
* Preserve the bottom bits and thus trigger a comparison
* failure on unaligned accesses.
*/
if (a_bits < s_bits) {
a_bits = s_bits;
}
tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
(32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
} else {
TCGReg t = addrlo;
/* If the access is unaligned, we need to make sure we fail if we
* cross a page boundary. The trick is to add the access size-1
* to the address before masking the low bits. That will make the
* address overflow to the next page if we cross a page boundary,
* which will then force a mismatch of the TLB compare.
*/
if (a_bits < s_bits) {
unsigned a_mask = (1 << a_bits) - 1;
unsigned s_mask = (1 << s_bits) - 1;
tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
t = TCG_REG_R0;
}
/* Mask the address for the requested alignment. */
if (TARGET_LONG_BITS == 32) {
tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
(32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
/* Zero-extend the address for use in the final address. */
tcg_out_ext32u(s, TCG_REG_R4, addrlo);
addrlo = TCG_REG_R4;
} else if (a_bits == 0) {
tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
} else {
tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
}
}
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
0, 7, TCG_TYPE_I32);
tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32);
tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
} else {
tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
0, 7, TCG_TYPE_TL);
}
return addrlo;
tcg_out32(s, MFSPR | RT(arg) | LR);
return arg;
}
/* Record the context of a call to the out of line helper code for the slow
path for a load or store, so that we can later generate the correct
helper code. */
static void add_qemu_ldst_label(TCGContext *s, bool is_ld,
TCGType type, MemOpIdx oi,
TCGReg datalo_reg, TCGReg datahi_reg,
TCGReg addrlo_reg, TCGReg addrhi_reg,
tcg_insn_unit *raddr, tcg_insn_unit *lptr)
{
TCGLabelQemuLdst *label = new_ldst_label(s);
label->is_ld = is_ld;
label->type = type;
label->oi = oi;
label->datalo_reg = datalo_reg;
label->datahi_reg = datahi_reg;
label->addrlo_reg = addrlo_reg;
label->addrhi_reg = addrhi_reg;
label->raddr = tcg_splitwx_to_rx(raddr);
label->label_ptr[0] = lptr;
}
/*
* For the purposes of ppc32 sorting 4 input registers into 4 argument
* registers, there is an outside chance we would require 3 temps.
*/
static const TCGLdstHelperParam ldst_helper_param = {
.ra_gen = ldst_ra_gen,
.ntmp = 3,
.tmp = { TCG_REG_TMP1, TCG_REG_TMP2, TCG_REG_R0 }
};
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
{
MemOpIdx oi = lb->oi;
MemOp opc = get_memop(oi);
TCGReg hi, lo, arg = TCG_REG_R3;
MemOp opc = get_memop(lb->oi);
if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
lo = lb->addrlo_reg;
hi = lb->addrhi_reg;
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
} else {
/* If the address needed to be zero-extended, we'll have already
placed it in R4. The only remaining case is 64-bit guest. */
tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
}
tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
tcg_out32(s, MFSPR | RT(arg) | LR);
tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
tcg_out_call_int(s, LK, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
lo = lb->datalo_reg;
hi = lb->datahi_reg;
if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4);
tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3);
} else {
tcg_out_movext(s, lb->type, lo,
TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_R3);
}
tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
tcg_out_b(s, 0, lb->raddr);
return true;
@ -2182,70 +2026,19 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
{
MemOpIdx oi = lb->oi;
MemOp opc = get_memop(oi);
MemOp s_bits = opc & MO_SIZE;
TCGReg hi, lo, arg = TCG_REG_R3;
MemOp opc = get_memop(lb->oi);
if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
lo = lb->addrlo_reg;
hi = lb->addrhi_reg;
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
} else {
/* If the address needed to be zero-extended, we'll have already
placed it in R4. The only remaining case is 64-bit guest. */
tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
}
lo = lb->datalo_reg;
hi = lb->datahi_reg;
if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
} else {
tcg_out_movext(s, s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32,
arg++, lb->type, s_bits, lo);
}
tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
tcg_out32(s, MFSPR | RT(arg) | LR);
tcg_out_st_helper_args(s, lb, &ldst_helper_param);
tcg_out_call_int(s, LK, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
tcg_out_b(s, 0, lb->raddr);
return true;
}
#else
static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
TCGReg addrhi, unsigned a_bits)
{
unsigned a_mask = (1 << a_bits) - 1;
TCGLabelQemuLdst *label = new_ldst_label(s);
label->is_ld = is_ld;
label->addrlo_reg = addrlo;
label->addrhi_reg = addrhi;
/* We are expecting a_bits to max out at 7, much lower than ANDI. */
tcg_debug_assert(a_bits < 16);
tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, a_mask));
label->label_ptr[0] = s->code_ptr;
tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
label->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
{
if (!reloc_pc14(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
@ -2294,37 +2087,187 @@ typedef struct {
TCGReg index;
} HostAddress;
/*
* For softmmu, perform the TLB load and compare.
* For useronly, perform any required alignment tests.
* In both cases, return a TCGLabelQemuLdst structure if the slow path
* is required and fill in @h with the host address for the fast path.
*/
static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
TCGReg addrlo, TCGReg addrhi,
MemOpIdx oi, bool is_ld)
{
TCGLabelQemuLdst *ldst = NULL;
MemOp opc = get_memop(oi);
unsigned a_bits = get_alignment_bits(opc);
#ifdef CONFIG_SOFTMMU
int mem_index = get_mmuidx(oi);
int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write);
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
int table_off = fast_off + offsetof(CPUTLBDescFast, table);
unsigned s_bits = opc & MO_SIZE;
ldst = new_ldst_label(s);
ldst->is_ld = is_ld;
ldst->oi = oi;
ldst->addrlo_reg = addrlo;
ldst->addrhi_reg = addrhi;
/* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off);
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off);
/* Extract the page index, shifted into place for tlb index. */
if (TCG_TARGET_REG_BITS == 32) {
tcg_out_shri32(s, TCG_REG_R0, addrlo,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
} else {
tcg_out_shri64(s, TCG_REG_R0, addrlo,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
}
tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
/* Load the (low part) TLB comparator into TMP2. */
if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
? LWZUX : LDUX);
tcg_out32(s, lxu | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2));
} else {
tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2));
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2,
TCG_REG_TMP1, cmp_off + 4 * HOST_BIG_ENDIAN);
} else {
tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP2, TCG_REG_TMP1, cmp_off);
}
}
/*
* Load the TLB addend for use on the fast path.
* Do this asap to minimize any load use delay.
*/
if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
offsetof(CPUTLBEntry, addend));
}
/* Clear the non-page, non-alignment bits from the address in R0. */
if (TCG_TARGET_REG_BITS == 32) {
/*
* We don't support unaligned accesses on 32-bits.
* Preserve the bottom bits and thus trigger a comparison
* failure on unaligned accesses.
*/
if (a_bits < s_bits) {
a_bits = s_bits;
}
tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
(32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
} else {
TCGReg t = addrlo;
/*
* If the access is unaligned, we need to make sure we fail if we
* cross a page boundary. The trick is to add the access size-1
* to the address before masking the low bits. That will make the
* address overflow to the next page if we cross a page boundary,
* which will then force a mismatch of the TLB compare.
*/
if (a_bits < s_bits) {
unsigned a_mask = (1 << a_bits) - 1;
unsigned s_mask = (1 << s_bits) - 1;
tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
t = TCG_REG_R0;
}
/* Mask the address for the requested alignment. */
if (TARGET_LONG_BITS == 32) {
tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
(32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
} else if (a_bits == 0) {
tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
} else {
tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
}
}
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
/* Low part comparison into cr7. */
tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2,
0, 7, TCG_TYPE_I32);
/* Load the high part TLB comparator into TMP2. */
tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1,
cmp_off + 4 * !HOST_BIG_ENDIAN);
/* Load addend, deferred for this case. */
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
offsetof(CPUTLBEntry, addend));
/* High part comparison into cr6. */
tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_TMP2, 0, 6, TCG_TYPE_I32);
/* Combine comparisons into cr7. */
tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
} else {
/* Full comparison into cr7. */
tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2,
0, 7, TCG_TYPE_TL);
}
/* Load a pointer into the current opcode w/conditional branch-link. */
ldst->label_ptr[0] = s->code_ptr;
tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
h->base = TCG_REG_TMP1;
#else
if (a_bits) {
ldst = new_ldst_label(s);
ldst->is_ld = is_ld;
ldst->oi = oi;
ldst->addrlo_reg = addrlo;
ldst->addrhi_reg = addrhi;
/* We are expecting a_bits to max out at 7, much lower than ANDI. */
tcg_debug_assert(a_bits < 16);
tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, (1 << a_bits) - 1));
ldst->label_ptr[0] = s->code_ptr;
tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
}
h->base = guest_base ? TCG_GUEST_BASE_REG : 0;
#endif
if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
/* Zero-extend the guest address for use in the host address. */
tcg_out_ext32u(s, TCG_REG_R0, addrlo);
h->index = TCG_REG_R0;
} else {
h->index = addrlo;
}
return ldst;
}
static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
TCGReg addrlo, TCGReg addrhi,
MemOpIdx oi, TCGType data_type)
{
MemOp opc = get_memop(oi);
MemOp s_bits = opc & MO_SIZE;
TCGLabelQemuLdst *ldst;
HostAddress h;
#ifdef CONFIG_SOFTMMU
tcg_insn_unit *label_ptr;
ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
h.index = tcg_out_tlb_read(s, opc, addrlo, addrhi, get_mmuidx(oi), true);
h.base = TCG_REG_R3;
/* Load a pointer into the current opcode w/conditional branch-link. */
label_ptr = s->code_ptr;
tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
#else /* !CONFIG_SOFTMMU */
unsigned a_bits = get_alignment_bits(opc);
if (a_bits) {
tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
}
h.base = guest_base ? TCG_GUEST_BASE_REG : 0;
h.index = addrlo;
if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
h.index = TCG_REG_TMP1;
}
#endif
if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
if (opc & MO_BSWAP) {
tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
@ -2357,10 +2300,12 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
}
}
#ifdef CONFIG_SOFTMMU
add_qemu_ldst_label(s, true, data_type, oi, datalo, datahi,
addrlo, addrhi, s->code_ptr, label_ptr);
#endif
if (ldst) {
ldst->type = data_type;
ldst->datalo_reg = datalo;
ldst->datahi_reg = datahi;
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
}
static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
@ -2368,32 +2313,12 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
MemOpIdx oi, TCGType data_type)
{
MemOp opc = get_memop(oi);
MemOp s_bits = opc & MO_SIZE;
TCGLabelQemuLdst *ldst;
HostAddress h;
#ifdef CONFIG_SOFTMMU
tcg_insn_unit *label_ptr;
ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
h.index = tcg_out_tlb_read(s, opc, addrlo, addrhi, get_mmuidx(oi), false);
h.base = TCG_REG_R3;
/* Load a pointer into the current opcode w/conditional branch-link. */
label_ptr = s->code_ptr;
tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
#else /* !CONFIG_SOFTMMU */
unsigned a_bits = get_alignment_bits(opc);
if (a_bits) {
tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
}
h.base = guest_base ? TCG_GUEST_BASE_REG : 0;
h.index = addrlo;
if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
h.index = TCG_REG_TMP1;
}
#endif
if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
if (opc & MO_BSWAP) {
tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
@ -2418,10 +2343,12 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
}
}
#ifdef CONFIG_SOFTMMU
add_qemu_ldst_label(s, false, data_type, oi, datalo, datahi,
addrlo, addrhi, s->code_ptr, label_ptr);
#endif
if (ldst) {
ldst->type = data_type;
ldst->datalo_reg = datalo;
ldst->datahi_reg = datahi;
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
}
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
@ -3812,23 +3739,23 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_qemu_ld_i32:
return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
? C_O1_I1(r, L)
: C_O1_I2(r, L, L));
? C_O1_I1(r, r)
: C_O1_I2(r, r, r));
case INDEX_op_qemu_st_i32:
return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
? C_O0_I2(S, S)
: C_O0_I3(S, S, S));
? C_O0_I2(r, r)
: C_O0_I3(r, r, r));
case INDEX_op_qemu_ld_i64:
return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
: TARGET_LONG_BITS == 32 ? C_O2_I1(L, L, L)
: C_O2_I2(L, L, L, L));
return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r)
: TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, r)
: C_O2_I2(r, r, r, r));
case INDEX_op_qemu_st_i64:
return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(S, S)
: TARGET_LONG_BITS == 32 ? C_O0_I3(S, S, S)
: C_O0_I4(S, S, S, S));
return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r)
: TARGET_LONG_BITS == 32 ? C_O0_I3(r, r, r)
: C_O0_I4(r, r, r, r));
case INDEX_op_add_vec:
case INDEX_op_sub_vec:
@ -3978,7 +3905,8 @@ static void tcg_target_init(TCGContext *s)
#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
#endif
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
if (USE_REG_TB) {

View file

@ -10,10 +10,8 @@
* tcg-target-con-str.h; the constraint combination is inclusive or.
*/
C_O0_I1(r)
C_O0_I2(LZ, L)
C_O0_I2(rZ, r)
C_O0_I2(rZ, rZ)
C_O1_I1(r, L)
C_O1_I1(r, r)
C_O1_I2(r, r, ri)
C_O1_I2(r, r, rI)

View file

@ -9,7 +9,6 @@
* REGS(letter, register_mask)
*/
REGS('r', ALL_GENERAL_REGS)
REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
/*
* Define constraint letters for constants:

View file

@ -125,17 +125,7 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
#define TCG_CT_CONST_N12 0x400
#define TCG_CT_CONST_M12 0x800
#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
/*
* For softmmu, we need to avoid conflicts with the first 5
* argument registers to call the helper. Some of these are
* also used for the tlb lookup.
*/
#ifdef CONFIG_SOFTMMU
#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_A0, 5)
#else
#define SOFTMMU_RESERVE_REGS 0
#endif
#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
#define sextreg sextract64
@ -899,10 +889,6 @@ static void * const qemu_st_helpers[MO_SIZE + 1] = {
#endif
};
/* We expect to use a 12-bit negative offset from ENV. */
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
{
tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0);
@ -910,84 +896,14 @@ static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
tcg_debug_assert(ok);
}
static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, MemOpIdx oi,
tcg_insn_unit **label_ptr, bool is_load)
{
MemOp opc = get_memop(oi);
unsigned s_bits = opc & MO_SIZE;
unsigned a_bits = get_alignment_bits(opc);
tcg_target_long compare_mask;
int mem_index = get_mmuidx(oi);
int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, mask_base, mask_ofs);
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, table_base, table_ofs);
tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
/* Load the tlb comparator and the addend. */
tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
is_load ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write));
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
offsetof(CPUTLBEntry, addend));
/* We don't support unaligned accesses. */
if (a_bits < s_bits) {
a_bits = s_bits;
}
/* Clear the non-page, non-alignment bits from the address. */
compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
if (compare_mask == sextreg(compare_mask, 0, 12)) {
tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr, compare_mask);
} else {
tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr);
}
/* Compare masked address with the TLB entry. */
label_ptr[0] = s->code_ptr;
tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
/* TLB Hit - translate address using addend. */
if (TARGET_LONG_BITS == 32) {
tcg_out_ext32u(s, TCG_REG_TMP0, addr);
addr = TCG_REG_TMP0;
}
tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addr);
return TCG_REG_TMP0;
}
static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi,
TCGType data_type, TCGReg data_reg,
TCGReg addr_reg, void *raddr,
tcg_insn_unit **label_ptr)
{
TCGLabelQemuLdst *label = new_ldst_label(s);
label->is_ld = is_ld;
label->oi = oi;
label->type = data_type;
label->datalo_reg = data_reg;
label->addrlo_reg = addr_reg;
label->raddr = tcg_splitwx_to_rx(raddr);
label->label_ptr[0] = label_ptr[0];
}
/* We have three temps, we might as well expose them. */
static const TCGLdstHelperParam ldst_helper_param = {
.ntmp = 3, .tmp = { TCG_REG_TMP0, TCG_REG_TMP1, TCG_REG_TMP2 }
};
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
MemOpIdx oi = l->oi;
MemOp opc = get_memop(oi);
TCGReg a0 = tcg_target_call_iarg_regs[0];
TCGReg a1 = tcg_target_call_iarg_regs[1];
TCGReg a2 = tcg_target_call_iarg_regs[2];
TCGReg a3 = tcg_target_call_iarg_regs[3];
MemOp opc = get_memop(l->oi);
/* resolve label address */
if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
@ -995,13 +911,9 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
}
/* call load helper */
tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0);
tcg_out_mov(s, TCG_TYPE_PTR, a1, l->addrlo_reg);
tcg_out_movi(s, TCG_TYPE_PTR, a2, oi);
tcg_out_movi(s, TCG_TYPE_PTR, a3, (tcg_target_long)l->raddr);
tcg_out_ld_helper_args(s, l, &ldst_helper_param);
tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SSIZE], false);
tcg_out_mov(s, (opc & MO_SIZE) == MO_64, l->datalo_reg, a0);
tcg_out_ld_helper_ret(s, l, true, &ldst_helper_param);
tcg_out_goto(s, l->raddr);
return true;
@ -1009,14 +921,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
MemOpIdx oi = l->oi;
MemOp opc = get_memop(oi);
MemOp s_bits = opc & MO_SIZE;
TCGReg a0 = tcg_target_call_iarg_regs[0];
TCGReg a1 = tcg_target_call_iarg_regs[1];
TCGReg a2 = tcg_target_call_iarg_regs[2];
TCGReg a3 = tcg_target_call_iarg_regs[3];
TCGReg a4 = tcg_target_call_iarg_regs[4];
MemOp opc = get_memop(l->oi);
/* resolve label address */
if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
@ -1024,39 +929,13 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
}
/* call store helper */
tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0);
tcg_out_mov(s, TCG_TYPE_PTR, a1, l->addrlo_reg);
tcg_out_movext(s, s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32, a2,
l->type, s_bits, l->datalo_reg);
tcg_out_movi(s, TCG_TYPE_PTR, a3, oi);
tcg_out_movi(s, TCG_TYPE_PTR, a4, (tcg_target_long)l->raddr);
tcg_out_st_helper_args(s, l, &ldst_helper_param);
tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
tcg_out_goto(s, l->raddr);
return true;
}
#else
static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg,
unsigned a_bits)
{
unsigned a_mask = (1 << a_bits) - 1;
TCGLabelQemuLdst *l = new_ldst_label(s);
l->is_ld = is_ld;
l->addrlo_reg = addr_reg;
/* We are expecting a_bits to max out at 7, so we can always use andi. */
tcg_debug_assert(a_bits < 12);
tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask);
l->label_ptr[0] = s->code_ptr;
tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP1, TCG_REG_ZERO, 0);
l->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
{
/* resolve label address */
@ -1083,9 +962,108 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
return tcg_out_fail_alignment(s, l);
}
#endif /* CONFIG_SOFTMMU */
/*
* For softmmu, perform the TLB load and compare.
* For useronly, perform any required alignment tests.
* In both cases, return a TCGLabelQemuLdst structure if the slow path
* is required and fill in @h with the host address for the fast path.
*/
static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
TCGReg addr_reg, MemOpIdx oi,
bool is_ld)
{
TCGLabelQemuLdst *ldst = NULL;
MemOp opc = get_memop(oi);
unsigned a_bits = get_alignment_bits(opc);
unsigned a_mask = (1u << a_bits) - 1;
#ifdef CONFIG_SOFTMMU
unsigned s_bits = opc & MO_SIZE;
int mem_index = get_mmuidx(oi);
int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
tcg_target_long compare_mask;
ldst = new_ldst_label(s);
ldst->is_ld = is_ld;
ldst->oi = oi;
ldst->addrlo_reg = addr_reg;
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, mask_base, mask_ofs);
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, table_base, table_ofs);
tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
/* Load the tlb comparator and the addend. */
tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
is_ld ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write));
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
offsetof(CPUTLBEntry, addend));
/* We don't support unaligned accesses. */
if (a_bits < s_bits) {
a_bits = s_bits;
}
/* Clear the non-page, non-alignment bits from the address. */
compare_mask = (tcg_target_long)TARGET_PAGE_MASK | a_mask;
if (compare_mask == sextreg(compare_mask, 0, 12)) {
tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, compare_mask);
} else {
tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr_reg);
}
/* Compare masked address with the TLB entry. */
ldst->label_ptr[0] = s->code_ptr;
tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
/* TLB Hit - translate address using addend. */
if (TARGET_LONG_BITS == 32) {
tcg_out_ext32u(s, TCG_REG_TMP0, addr_reg);
addr_reg = TCG_REG_TMP0;
}
tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addr_reg);
*pbase = TCG_REG_TMP0;
#else
if (a_mask) {
ldst = new_ldst_label(s);
ldst->is_ld = is_ld;
ldst->oi = oi;
ldst->addrlo_reg = addr_reg;
/* We are expecting a_bits max 7, so we can always use andi. */
tcg_debug_assert(a_bits < 12);
tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask);
ldst->label_ptr[0] = s->code_ptr;
tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP1, TCG_REG_ZERO, 0);
}
TCGReg base = addr_reg;
if (TARGET_LONG_BITS == 32) {
tcg_out_ext32u(s, TCG_REG_TMP0, base);
base = TCG_REG_TMP0;
}
if (guest_base != 0) {
tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base);
base = TCG_REG_TMP0;
}
*pbase = base;
#endif
return ldst;
}
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg val,
TCGReg base, MemOp opc, TCGType type)
{
@ -1125,32 +1103,17 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg val,
static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
MemOpIdx oi, TCGType data_type)
{
MemOp opc = get_memop(oi);
TCGLabelQemuLdst *ldst;
TCGReg base;
#if defined(CONFIG_SOFTMMU)
tcg_insn_unit *label_ptr[1];
ldst = prepare_host_addr(s, &base, addr_reg, oi, true);
tcg_out_qemu_ld_direct(s, data_reg, base, get_memop(oi), data_type);
base = tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 1);
tcg_out_qemu_ld_direct(s, data_reg, base, opc, data_type);
add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
s->code_ptr, label_ptr);
#else
unsigned a_bits = get_alignment_bits(opc);
if (a_bits) {
tcg_out_test_alignment(s, true, addr_reg, a_bits);
if (ldst) {
ldst->type = data_type;
ldst->datalo_reg = data_reg;
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
base = addr_reg;
if (TARGET_LONG_BITS == 32) {
tcg_out_ext32u(s, TCG_REG_TMP0, base);
base = TCG_REG_TMP0;
}
if (guest_base != 0) {
tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base);
base = TCG_REG_TMP0;
}
tcg_out_qemu_ld_direct(s, data_reg, base, opc, data_type);
#endif
}
static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg val,
@ -1180,32 +1143,17 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg val,
static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
MemOpIdx oi, TCGType data_type)
{
MemOp opc = get_memop(oi);
TCGLabelQemuLdst *ldst;
TCGReg base;
#if defined(CONFIG_SOFTMMU)
tcg_insn_unit *label_ptr[1];
ldst = prepare_host_addr(s, &base, addr_reg, oi, false);
tcg_out_qemu_st_direct(s, data_reg, base, get_memop(oi));
base = tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 0);
tcg_out_qemu_st_direct(s, data_reg, base, opc);
add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
s->code_ptr, label_ptr);
#else
unsigned a_bits = get_alignment_bits(opc);
if (a_bits) {
tcg_out_test_alignment(s, false, addr_reg, a_bits);
if (ldst) {
ldst->type = data_type;
ldst->datalo_reg = data_reg;
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
base = addr_reg;
if (TARGET_LONG_BITS == 32) {
tcg_out_ext32u(s, TCG_REG_TMP0, base);
base = TCG_REG_TMP0;
}
if (guest_base != 0) {
tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base);
base = TCG_REG_TMP0;
}
tcg_out_qemu_st_direct(s, data_reg, base, opc);
#endif
}
static const tcg_insn_unit *tb_ret_addr;
@ -1642,10 +1590,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_qemu_ld_i32:
case INDEX_op_qemu_ld_i64:
return C_O1_I1(r, L);
return C_O1_I1(r, r);
case INDEX_op_qemu_st_i32:
case INDEX_op_qemu_st_i64:
return C_O0_I2(LZ, L);
return C_O0_I2(rZ, r);
default:
g_assert_not_reached();

View file

@ -10,12 +10,10 @@
* tcg-target-con-str.h; the constraint combination is inclusive or.
*/
C_O0_I1(r)
C_O0_I2(L, L)
C_O0_I2(r, r)
C_O0_I2(r, ri)
C_O0_I2(r, rA)
C_O0_I2(v, r)
C_O1_I1(r, L)
C_O1_I1(r, r)
C_O1_I1(v, r)
C_O1_I1(v, v)

View file

@ -9,7 +9,6 @@
* REGS(letter, register_mask)
*/
REGS('r', ALL_GENERAL_REGS)
REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
REGS('v', ALL_VECTOR_REGS)
REGS('o', 0xaaaa) /* odd numbered general regs */

View file

@ -44,18 +44,6 @@
#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 16)
#define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32)
/*
* For softmmu, we need to avoid conflicts with the first 3
* argument registers to perform the tlb lookup, and to call
* the helper function.
*/
#ifdef CONFIG_SOFTMMU
#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_R2, 3)
#else
#define SOFTMMU_RESERVE_REGS 0
#endif
/* Several places within the instruction set 0 means "no register"
rather than TCG_REG_R0. */
#define TCG_REG_NONE 0
@ -149,6 +137,7 @@ typedef enum S390Opcode {
RRE_ALGR = 0xb90a,
RRE_ALCR = 0xb998,
RRE_ALCGR = 0xb988,
RRE_ALGFR = 0xb91a,
RRE_CGR = 0xb920,
RRE_CLGR = 0xb921,
RRE_DLGR = 0xb987,
@ -1718,98 +1707,22 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
}
#if defined(CONFIG_SOFTMMU)
/* We're expecting to use a 20-bit negative offset on the tlb memory ops. */
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
/* Load and compare a TLB entry, leaving the flags set. Loads the TLB
addend into R2. Returns a register with the santitized guest address. */
static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
int mem_index, bool is_ld)
{
unsigned s_bits = opc & MO_SIZE;
unsigned a_bits = get_alignment_bits(opc);
unsigned s_mask = (1 << s_bits) - 1;
unsigned a_mask = (1 << a_bits) - 1;
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
int table_off = fast_off + offsetof(CPUTLBDescFast, table);
int ofs, a_off;
uint64_t tlb_mask;
tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off);
tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off);
/* For aligned accesses, we check the first byte and include the alignment
bits within the address. For unaligned access, we check that we don't
cross pages using the address of the last byte of the access. */
a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
if (a_off == 0) {
tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
} else {
tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
}
if (is_ld) {
ofs = offsetof(CPUTLBEntry, addr_read);
} else {
ofs = offsetof(CPUTLBEntry, addr_write);
}
if (TARGET_LONG_BITS == 32) {
tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
} else {
tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
}
tcg_out_insn(s, RXY, LG, TCG_REG_R2, TCG_REG_R2, TCG_REG_NONE,
offsetof(CPUTLBEntry, addend));
if (TARGET_LONG_BITS == 32) {
tcg_out_ext32u(s, TCG_REG_R3, addr_reg);
return TCG_REG_R3;
}
return addr_reg;
}
static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
TCGType type, TCGReg data, TCGReg addr,
tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
{
TCGLabelQemuLdst *label = new_ldst_label(s);
label->is_ld = is_ld;
label->oi = oi;
label->type = type;
label->datalo_reg = data;
label->addrlo_reg = addr;
label->raddr = tcg_splitwx_to_rx(raddr);
label->label_ptr[0] = label_ptr;
}
static const TCGLdstHelperParam ldst_helper_param = {
.ntmp = 1, .tmp = { TCG_TMP0 }
};
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
{
TCGReg addr_reg = lb->addrlo_reg;
TCGReg data_reg = lb->datalo_reg;
MemOpIdx oi = lb->oi;
MemOp opc = get_memop(oi);
MemOp opc = get_memop(lb->oi);
if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
(intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
return false;
}
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
if (TARGET_LONG_BITS == 64) {
tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
}
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi);
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr);
tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
return true;
@ -1817,51 +1730,20 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
{
TCGReg addr_reg = lb->addrlo_reg;
TCGReg data_reg = lb->datalo_reg;
MemOpIdx oi = lb->oi;
MemOp opc = get_memop(oi);
MemOp size = opc & MO_SIZE;
MemOp opc = get_memop(lb->oi);
if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
(intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
return false;
}
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
if (TARGET_LONG_BITS == 64) {
tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
}
tcg_out_movext(s, size == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32,
TCG_REG_R4, lb->type, size, data_reg);
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi);
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr);
tcg_out_st_helper_args(s, lb, &ldst_helper_param);
tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
return true;
}
#else
static void tcg_out_test_alignment(TCGContext *s, bool is_ld,
TCGReg addrlo, unsigned a_bits)
{
unsigned a_mask = (1 << a_bits) - 1;
TCGLabelQemuLdst *l = new_ldst_label(s);
l->is_ld = is_ld;
l->addrlo_reg = addrlo;
/* We are expecting a_bits to max out at 7, much lower than TMLL. */
tcg_debug_assert(a_bits < 16);
tcg_out_insn(s, RI, TMLL, addrlo, a_mask);
tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
l->label_ptr[0] = s->code_ptr;
s->code_ptr += 1;
l->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
{
if (!patch_reloc(l->label_ptr[0], R_390_PC16DBL,
@ -1888,91 +1770,147 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
return tcg_out_fail_alignment(s, l);
}
#endif /* CONFIG_SOFTMMU */
static HostAddress tcg_prepare_user_ldst(TCGContext *s, TCGReg addr_reg)
/*
* For softmmu, perform the TLB load and compare.
* For useronly, perform any required alignment tests.
* In both cases, return a TCGLabelQemuLdst structure if the slow path
* is required and fill in @h with the host address for the fast path.
*/
static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
TCGReg addr_reg, MemOpIdx oi,
bool is_ld)
{
TCGReg index;
int disp;
TCGLabelQemuLdst *ldst = NULL;
MemOp opc = get_memop(oi);
unsigned a_bits = get_alignment_bits(opc);
unsigned a_mask = (1u << a_bits) - 1;
#ifdef CONFIG_SOFTMMU
unsigned s_bits = opc & MO_SIZE;
unsigned s_mask = (1 << s_bits) - 1;
int mem_index = get_mmuidx(oi);
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
int table_off = fast_off + offsetof(CPUTLBDescFast, table);
int ofs, a_off;
uint64_t tlb_mask;
ldst = new_ldst_label(s);
ldst->is_ld = is_ld;
ldst->oi = oi;
ldst->addrlo_reg = addr_reg;
tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, addr_reg, TCG_REG_NONE,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
tcg_out_insn(s, RXY, NG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, mask_off);
tcg_out_insn(s, RXY, AG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, table_off);
/*
* For aligned accesses, we check the first byte and include the alignment
* bits within the address. For unaligned access, we check that we don't
* cross pages using the address of the last byte of the access.
*/
a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
if (a_off == 0) {
tgen_andi_risbg(s, TCG_REG_R0, addr_reg, tlb_mask);
} else {
tcg_out_insn(s, RX, LA, TCG_REG_R0, addr_reg, TCG_REG_NONE, a_off);
tgen_andi(s, TCG_TYPE_TL, TCG_REG_R0, tlb_mask);
}
if (is_ld) {
ofs = offsetof(CPUTLBEntry, addr_read);
} else {
ofs = offsetof(CPUTLBEntry, addr_write);
}
if (TARGET_LONG_BITS == 32) {
tcg_out_insn(s, RX, C, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs);
} else {
tcg_out_insn(s, RXY, CG, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs);
}
tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
ldst->label_ptr[0] = s->code_ptr++;
h->index = TCG_TMP0;
tcg_out_insn(s, RXY, LG, h->index, TCG_TMP0, TCG_REG_NONE,
offsetof(CPUTLBEntry, addend));
if (TARGET_LONG_BITS == 32) {
tcg_out_insn(s, RRE, ALGFR, h->index, addr_reg);
h->base = TCG_REG_NONE;
} else {
h->base = addr_reg;
}
h->disp = 0;
#else
if (a_mask) {
ldst = new_ldst_label(s);
ldst->is_ld = is_ld;
ldst->oi = oi;
ldst->addrlo_reg = addr_reg;
/* We are expecting a_bits to max out at 7, much lower than TMLL. */
tcg_debug_assert(a_bits < 16);
tcg_out_insn(s, RI, TMLL, addr_reg, a_mask);
tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
ldst->label_ptr[0] = s->code_ptr++;
}
h->base = addr_reg;
if (TARGET_LONG_BITS == 32) {
tcg_out_ext32u(s, TCG_TMP0, addr_reg);
addr_reg = TCG_TMP0;
h->base = TCG_TMP0;
}
if (guest_base < 0x80000) {
index = TCG_REG_NONE;
disp = guest_base;
h->index = TCG_REG_NONE;
h->disp = guest_base;
} else {
index = TCG_GUEST_BASE_REG;
disp = 0;
h->index = TCG_GUEST_BASE_REG;
h->disp = 0;
}
return (HostAddress){ .base = addr_reg, .index = index, .disp = disp };
#endif
return ldst;
}
#endif /* CONFIG_SOFTMMU */
static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
MemOpIdx oi, TCGType data_type)
{
MemOp opc = get_memop(oi);
TCGLabelQemuLdst *ldst;
HostAddress h;
#ifdef CONFIG_SOFTMMU
unsigned mem_index = get_mmuidx(oi);
tcg_insn_unit *label_ptr;
ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
tcg_out_qemu_ld_direct(s, get_memop(oi), data_reg, h);
h.base = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
h.index = TCG_REG_R2;
h.disp = 0;
tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
label_ptr = s->code_ptr;
s->code_ptr += 1;
tcg_out_qemu_ld_direct(s, opc, data_reg, h);
add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
s->code_ptr, label_ptr);
#else
unsigned a_bits = get_alignment_bits(opc);
if (a_bits) {
tcg_out_test_alignment(s, true, addr_reg, a_bits);
if (ldst) {
ldst->type = data_type;
ldst->datalo_reg = data_reg;
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
h = tcg_prepare_user_ldst(s, addr_reg);
tcg_out_qemu_ld_direct(s, opc, data_reg, h);
#endif
}
static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
MemOpIdx oi, TCGType data_type)
{
MemOp opc = get_memop(oi);
TCGLabelQemuLdst *ldst;
HostAddress h;
#ifdef CONFIG_SOFTMMU
unsigned mem_index = get_mmuidx(oi);
tcg_insn_unit *label_ptr;
ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
h.base = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
h.index = TCG_REG_R2;
h.disp = 0;
tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
label_ptr = s->code_ptr;
s->code_ptr += 1;
tcg_out_qemu_st_direct(s, opc, data_reg, h);
add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
s->code_ptr, label_ptr);
#else
unsigned a_bits = get_alignment_bits(opc);
if (a_bits) {
tcg_out_test_alignment(s, false, addr_reg, a_bits);
if (ldst) {
ldst->type = data_type;
ldst->datalo_reg = data_reg;
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
h = tcg_prepare_user_ldst(s, addr_reg);
tcg_out_qemu_st_direct(s, opc, data_reg, h);
#endif
}
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
@ -3205,10 +3143,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_qemu_ld_i32:
case INDEX_op_qemu_ld_i64:
return C_O1_I1(r, L);
return C_O1_I1(r, r);
case INDEX_op_qemu_st_i64:
case INDEX_op_qemu_st_i32:
return C_O0_I2(L, L);
return C_O0_I2(r, r);
case INDEX_op_deposit_i32:
case INDEX_op_deposit_i64:

480
tcg/tcg.c
View file

@ -181,6 +181,22 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
static int tcg_out_ldst_finalize(TCGContext *s);
#endif
typedef struct TCGLdstHelperParam {
TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
unsigned ntmp;
int tmp[3];
} TCGLdstHelperParam;
static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
const TCGLdstHelperParam *p)
__attribute__((unused));
static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
bool load_sign, const TCGLdstHelperParam *p)
__attribute__((unused));
static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
const TCGLdstHelperParam *p)
__attribute__((unused));
TCGContext tcg_init_ctx;
__thread TCGContext *tcg_ctx;
@ -366,8 +382,17 @@ void tcg_raise_tb_overflow(TCGContext *s)
siglongjmp(s->jmp_trans, -2);
}
/*
* Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
* By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
*
* However, tcg_out_helper_load_slots reuses this field to hold an
* argument slot number (which may designate a argument register or an
* argument stack slot), converting to TCGReg once all arguments that
* are destined for the stack are processed.
*/
typedef struct TCGMovExtend {
TCGReg dst;
unsigned dst;
TCGReg src;
TCGType dst_type;
TCGType src_type;
@ -459,9 +484,8 @@ static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
* between the sources and destinations.
*/
static void __attribute__((unused))
tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
const TCGMovExtend *i2, int scratch)
static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
const TCGMovExtend *i2, int scratch)
{
TCGReg src1 = i1->src;
TCGReg src2 = i2->src;
@ -715,6 +739,58 @@ static TCGHelperInfo all_helpers[] = {
};
static GHashTable *helper_table;
/*
* Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
* akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
* We only use these for layout in tcg_out_ld_helper_ret and
* tcg_out_st_helper_args, and share them between several of
* the helpers, with the end result that it's easier to build manually.
*/
#if TCG_TARGET_REG_BITS == 32
# define dh_typecode_ttl dh_typecode_i32
#else
# define dh_typecode_ttl dh_typecode_i64
#endif
static TCGHelperInfo info_helper_ld32_mmu = {
.flags = TCG_CALL_NO_WG,
.typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */
| dh_typemask(env, 1)
| dh_typemask(tl, 2) /* target_ulong addr */
| dh_typemask(i32, 3) /* unsigned oi */
| dh_typemask(ptr, 4) /* uintptr_t ra */
};
static TCGHelperInfo info_helper_ld64_mmu = {
.flags = TCG_CALL_NO_WG,
.typemask = dh_typemask(i64, 0) /* return uint64_t */
| dh_typemask(env, 1)
| dh_typemask(tl, 2) /* target_ulong addr */
| dh_typemask(i32, 3) /* unsigned oi */
| dh_typemask(ptr, 4) /* uintptr_t ra */
};
static TCGHelperInfo info_helper_st32_mmu = {
.flags = TCG_CALL_NO_WG,
.typemask = dh_typemask(void, 0)
| dh_typemask(env, 1)
| dh_typemask(tl, 2) /* target_ulong addr */
| dh_typemask(i32, 3) /* uint32_t data */
| dh_typemask(i32, 4) /* unsigned oi */
| dh_typemask(ptr, 5) /* uintptr_t ra */
};
static TCGHelperInfo info_helper_st64_mmu = {
.flags = TCG_CALL_NO_WG,
.typemask = dh_typemask(void, 0)
| dh_typemask(env, 1)
| dh_typemask(tl, 2) /* target_ulong addr */
| dh_typemask(i64, 3) /* uint64_t data */
| dh_typemask(i32, 4) /* unsigned oi */
| dh_typemask(ptr, 5) /* uintptr_t ra */
};
#ifdef CONFIG_TCG_INTERPRETER
static ffi_type *typecode_to_ffi(int argmask)
{
@ -1126,6 +1202,11 @@ static void tcg_context_init(unsigned max_cpus)
(gpointer)&all_helpers[i]);
}
init_call_layout(&info_helper_ld32_mmu);
init_call_layout(&info_helper_ld64_mmu);
init_call_layout(&info_helper_st32_mmu);
init_call_layout(&info_helper_st64_mmu);
#ifdef CONFIG_TCG_INTERPRETER
init_ffi_layouts();
#endif
@ -2104,13 +2185,8 @@ static const char * const ldst_name[] =
};
static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
#ifdef TARGET_ALIGNED_ONLY
[MO_UNALN >> MO_ASHIFT] = "un+",
[MO_ALIGN >> MO_ASHIFT] = "",
#else
[MO_UNALN >> MO_ASHIFT] = "",
[MO_ALIGN >> MO_ASHIFT] = "al+",
#endif
[MO_ALIGN_2 >> MO_ASHIFT] = "al2+",
[MO_ALIGN_4 >> MO_ASHIFT] = "al4+",
[MO_ALIGN_8 >> MO_ASHIFT] = "al8+",
@ -5011,6 +5087,392 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
}
}
/*
* Similarly for qemu_ld/st slow path helpers.
* We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
* using only the provided backend tcg_out_* functions.
*/
static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
{
int ofs = arg_slot_stk_ofs(slot);
/*
* Each stack slot is TCG_TARGET_LONG_BITS. If the host does not
* require extension to uint64_t, adjust the address for uint32_t.
*/
if (HOST_BIG_ENDIAN &&
TCG_TARGET_REG_BITS == 64 &&
type == TCG_TYPE_I32) {
ofs += 4;
}
return ofs;
}
static void tcg_out_helper_load_regs(TCGContext *s,
unsigned nmov, TCGMovExtend *mov,
unsigned ntmp, const int *tmp)
{
switch (nmov) {
default:
/* The backend must have provided enough temps for the worst case. */
tcg_debug_assert(ntmp + 1 >= nmov);
for (unsigned i = nmov - 1; i >= 2; --i) {
TCGReg dst = mov[i].dst;
for (unsigned j = 0; j < i; ++j) {
if (dst == mov[j].src) {
/*
* Conflict.
* Copy the source to a temporary, recurse for the
* remaining moves, perform the extension from our
* scratch on the way out.
*/
TCGReg scratch = tmp[--ntmp];
tcg_out_mov(s, mov[i].src_type, scratch, mov[i].src);
mov[i].src = scratch;
tcg_out_helper_load_regs(s, i, mov, ntmp, tmp);
tcg_out_movext1(s, &mov[i]);
return;
}
}
/* No conflicts: perform this move and continue. */
tcg_out_movext1(s, &mov[i]);
}
/* fall through for the final two moves */
case 2:
tcg_out_movext2(s, mov, mov + 1, ntmp ? tmp[0] : -1);
return;
case 1:
tcg_out_movext1(s, mov);
return;
case 0:
g_assert_not_reached();
}
}
static void tcg_out_helper_load_slots(TCGContext *s,
unsigned nmov, TCGMovExtend *mov,
const TCGLdstHelperParam *parm)
{
unsigned i;
/*
* Start from the end, storing to the stack first.
* This frees those registers, so we need not consider overlap.
*/
for (i = nmov; i-- > 0; ) {
unsigned slot = mov[i].dst;
if (arg_slot_reg_p(slot)) {
goto found_reg;
}
TCGReg src = mov[i].src;
TCGType dst_type = mov[i].dst_type;
MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
/* The argument is going onto the stack; extend into scratch. */
if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
tcg_debug_assert(parm->ntmp != 0);
mov[i].dst = src = parm->tmp[0];
tcg_out_movext1(s, &mov[i]);
}
tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
tcg_out_helper_stk_ofs(dst_type, slot));
}
return;
found_reg:
/*
* The remaining arguments are in registers.
* Convert slot numbers to argument registers.
*/
nmov = i + 1;
for (i = 0; i < nmov; ++i) {
mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
}
tcg_out_helper_load_regs(s, nmov, mov, parm->ntmp, parm->tmp);
}
static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
TCGType type, tcg_target_long imm,
const TCGLdstHelperParam *parm)
{
if (arg_slot_reg_p(slot)) {
tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
} else {
int ofs = tcg_out_helper_stk_ofs(type, slot);
if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
tcg_debug_assert(parm->ntmp != 0);
tcg_out_movi(s, type, parm->tmp[0], imm);
tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
}
}
}
static void tcg_out_helper_load_common_args(TCGContext *s,
const TCGLabelQemuLdst *ldst,
const TCGLdstHelperParam *parm,
const TCGHelperInfo *info,
unsigned next_arg)
{
TCGMovExtend ptr_mov = {
.dst_type = TCG_TYPE_PTR,
.src_type = TCG_TYPE_PTR,
.src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
};
const TCGCallArgumentLoc *loc = &info->in[0];
TCGType type;
unsigned slot;
tcg_target_ulong imm;
/*
* Handle env, which is always first.
*/
ptr_mov.dst = loc->arg_slot;
ptr_mov.src = TCG_AREG0;
tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
/*
* Handle oi.
*/
imm = ldst->oi;
loc = &info->in[next_arg];
type = TCG_TYPE_I32;
switch (loc->kind) {
case TCG_CALL_ARG_NORMAL:
break;
case TCG_CALL_ARG_EXTEND_U:
case TCG_CALL_ARG_EXTEND_S:
/* No extension required for MemOpIdx. */
tcg_debug_assert(imm <= INT32_MAX);
type = TCG_TYPE_REG;
break;
default:
g_assert_not_reached();
}
tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
next_arg++;
/*
* Handle ra.
*/
loc = &info->in[next_arg];
slot = loc->arg_slot;
if (parm->ra_gen) {
int arg_reg = -1;
TCGReg ra_reg;
if (arg_slot_reg_p(slot)) {
arg_reg = tcg_target_call_iarg_regs[slot];
}
ra_reg = parm->ra_gen(s, ldst, arg_reg);
ptr_mov.dst = slot;
ptr_mov.src = ra_reg;
tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
} else {
imm = (uintptr_t)ldst->raddr;
tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
}
}
static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
const TCGCallArgumentLoc *loc,
TCGType dst_type, TCGType src_type,
TCGReg lo, TCGReg hi)
{
if (dst_type <= TCG_TYPE_REG) {
MemOp src_ext;
switch (loc->kind) {
case TCG_CALL_ARG_NORMAL:
src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
break;
case TCG_CALL_ARG_EXTEND_U:
dst_type = TCG_TYPE_REG;
src_ext = MO_UL;
break;
case TCG_CALL_ARG_EXTEND_S:
dst_type = TCG_TYPE_REG;
src_ext = MO_SL;
break;
default:
g_assert_not_reached();
}
mov[0].dst = loc->arg_slot;
mov[0].dst_type = dst_type;
mov[0].src = lo;
mov[0].src_type = src_type;
mov[0].src_ext = src_ext;
return 1;
}
assert(TCG_TARGET_REG_BITS == 32);
mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
mov[0].src = lo;
mov[0].dst_type = TCG_TYPE_I32;
mov[0].src_type = TCG_TYPE_I32;
mov[0].src_ext = MO_32;
mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
mov[1].src = hi;
mov[1].dst_type = TCG_TYPE_I32;
mov[1].src_type = TCG_TYPE_I32;
mov[1].src_ext = MO_32;
return 2;
}
static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
const TCGLdstHelperParam *parm)
{
const TCGHelperInfo *info;
const TCGCallArgumentLoc *loc;
TCGMovExtend mov[2];
unsigned next_arg, nmov;
MemOp mop = get_memop(ldst->oi);
switch (mop & MO_SIZE) {
case MO_8:
case MO_16:
case MO_32:
info = &info_helper_ld32_mmu;
break;
case MO_64:
info = &info_helper_ld64_mmu;
break;
default:
g_assert_not_reached();
}
/* Defer env argument. */
next_arg = 1;
loc = &info->in[next_arg];
nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
ldst->addrlo_reg, ldst->addrhi_reg);
next_arg += nmov;
tcg_out_helper_load_slots(s, nmov, mov, parm);
/* No special attention for 32 and 64-bit return values. */
tcg_debug_assert(info->out_kind == TCG_CALL_RET_NORMAL);
tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
}
static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
bool load_sign,
const TCGLdstHelperParam *parm)
{
TCGMovExtend mov[2];
if (ldst->type <= TCG_TYPE_REG) {
MemOp mop = get_memop(ldst->oi);
mov[0].dst = ldst->datalo_reg;
mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
mov[0].dst_type = ldst->type;
mov[0].src_type = TCG_TYPE_REG;
/*
* If load_sign, then we allowed the helper to perform the
* appropriate sign extension to tcg_target_ulong, and all
* we need now is a plain move.
*
* If they do not, then we expect the relevant extension
* instruction to be no more expensive than a move, and
* we thus save the icache etc by only using one of two
* helper functions.
*/
if (load_sign || !(mop & MO_SIGN)) {
if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
mov[0].src_ext = MO_32;
} else {
mov[0].src_ext = MO_64;
}
} else {
mov[0].src_ext = mop & MO_SSIZE;
}
tcg_out_movext1(s, mov);
} else {
assert(TCG_TARGET_REG_BITS == 32);
mov[0].dst = ldst->datalo_reg;
mov[0].src =
tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
mov[0].dst_type = TCG_TYPE_I32;
mov[0].src_type = TCG_TYPE_I32;
mov[0].src_ext = MO_32;
mov[1].dst = ldst->datahi_reg;
mov[1].src =
tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
mov[1].dst_type = TCG_TYPE_REG;
mov[1].src_type = TCG_TYPE_REG;
mov[1].src_ext = MO_32;
tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
}
}
static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
const TCGLdstHelperParam *parm)
{
const TCGHelperInfo *info;
const TCGCallArgumentLoc *loc;
TCGMovExtend mov[4];
TCGType data_type;
unsigned next_arg, nmov, n;
MemOp mop = get_memop(ldst->oi);
switch (mop & MO_SIZE) {
case MO_8:
case MO_16:
case MO_32:
info = &info_helper_st32_mmu;
data_type = TCG_TYPE_I32;
break;
case MO_64:
info = &info_helper_st64_mmu;
data_type = TCG_TYPE_I64;
break;
default:
g_assert_not_reached();
}
/* Defer env argument. */
next_arg = 1;
nmov = 0;
/* Handle addr argument. */
loc = &info->in[next_arg];
n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
ldst->addrlo_reg, ldst->addrhi_reg);
next_arg += n;
nmov += n;
/* Handle data argument. */
loc = &info->in[next_arg];
n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
ldst->datalo_reg, ldst->datahi_reg);
next_arg += n;
nmov += n;
tcg_debug_assert(nmov <= ARRAY_SIZE(mov));
tcg_out_helper_load_slots(s, nmov, mov, parm);
tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
}
#ifdef CONFIG_PROFILER
/* avoid copy/paste errors */

View file

@ -8,6 +8,7 @@
*/
#include "qemu/osdep.h"
#include "qemu/lockable.h"
#include "cpu.h"
#include "trace/trace-root.h"
#include "trace/control.h"
@ -116,11 +117,9 @@ static bool adding_first_cpu1(void)
static bool adding_first_cpu(void)
{
bool res;
cpu_list_lock();
res = adding_first_cpu1();
cpu_list_unlock();
return res;
QEMU_LOCK_GUARD(&qemu_cpu_list_lock);
return adding_first_cpu1();
}
void trace_init_vcpu(CPUState *vcpu)