diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 16e4fe3ccd..bdfa036ac8 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -245,11 +245,11 @@ static void cpu_exec_exit(CPUState *cpu) void cpu_exec_step_atomic(CPUState *cpu) { + CPUArchState *env = (CPUArchState *)cpu->env_ptr; TranslationBlock *tb; target_ulong cs_base, pc; uint32_t flags; - uint32_t cflags = 1; - uint32_t cf_mask = cflags & CF_HASH_MASK; + uint32_t cflags = (curr_cflags(cpu) & ~CF_PARALLEL) | 1; int tb_exit; if (sigsetjmp(cpu->jmp_env, 0) == 0) { @@ -258,15 +258,15 @@ void cpu_exec_step_atomic(CPUState *cpu) g_assert(!cpu->running); cpu->running = true; - tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask); + cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); + tb = tb_lookup(cpu, pc, cs_base, flags, cflags); + if (tb == NULL) { mmap_lock(); tb = tb_gen_code(cpu, pc, cs_base, flags, cflags); mmap_unlock(); } - /* Since we got here, we know that parallel_cpus must be true. */ - parallel_cpus = false; cpu_exec_enter(cpu); /* execute the generated code */ trace_exec_tb(tb, pc); @@ -294,7 +294,6 @@ void cpu_exec_step_atomic(CPUState *cpu) * the execution. */ g_assert(cpu_in_exclusive_context(cpu)); - parallel_cpus = true; cpu->running = false; end_exclusive(); } @@ -305,7 +304,7 @@ struct tb_desc { CPUArchState *env; tb_page_addr_t phys_page1; uint32_t flags; - uint32_t cf_mask; + uint32_t cflags; uint32_t trace_vcpu_dstate; }; @@ -319,7 +318,7 @@ static bool tb_lookup_cmp(const void *p, const void *d) tb->cs_base == desc->cs_base && tb->flags == desc->flags && tb->trace_vcpu_dstate == desc->trace_vcpu_dstate && - (tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == desc->cf_mask) { + tb_cflags(tb) == desc->cflags) { /* check next page if needed */ if (tb->page_addr[1] == -1) { return true; @@ -339,7 +338,7 @@ static bool tb_lookup_cmp(const void *p, const void *d) TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, target_ulong cs_base, uint32_t flags, - uint32_t cf_mask) + uint32_t cflags) { tb_page_addr_t phys_pc; struct tb_desc desc; @@ -348,7 +347,7 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, desc.env = (CPUArchState *)cpu->env_ptr; desc.cs_base = cs_base; desc.flags = flags; - desc.cf_mask = cf_mask; + desc.cflags = cflags; desc.trace_vcpu_dstate = *cpu->trace_dstate; desc.pc = pc; phys_pc = get_page_addr_code(desc.env, pc); @@ -356,7 +355,7 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, return NULL; } desc.phys_page1 = phys_pc & TARGET_PAGE_MASK; - h = tb_hash_func(phys_pc, pc, flags, cf_mask, *cpu->trace_dstate); + h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate); return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp); } @@ -416,16 +415,19 @@ static inline void tb_add_jump(TranslationBlock *tb, int n, static inline TranslationBlock *tb_find(CPUState *cpu, TranslationBlock *last_tb, - int tb_exit, uint32_t cf_mask) + int tb_exit, uint32_t cflags) { + CPUArchState *env = (CPUArchState *)cpu->env_ptr; TranslationBlock *tb; target_ulong cs_base, pc; uint32_t flags; - tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask); + cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); + + tb = tb_lookup(cpu, pc, cs_base, flags, cflags); if (tb == NULL) { mmap_lock(); - tb = tb_gen_code(cpu, pc, cs_base, flags, cf_mask); + tb = tb_gen_code(cpu, pc, cs_base, flags, cflags); mmap_unlock(); /* We add the TB in the virtual pc hash table for the fast lookup */ qatomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb); @@ -491,7 +493,7 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret) if (replay_has_exception() && cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0) { /* Execute just one insn to trigger exception pending in the log */ - cpu->cflags_next_tb = (curr_cflags() & ~CF_USE_ICOUNT) | 1; + cpu->cflags_next_tb = (curr_cflags(cpu) & ~CF_USE_ICOUNT) | 1; } #endif return false; @@ -788,7 +790,7 @@ int cpu_exec(CPUState *cpu) have CF_INVALID set, -1 is a convenient invalid value that does not require tcg headers for cpu_common_reset. */ if (cflags == -1) { - cflags = curr_cflags(); + cflags = curr_cflags(cpu); } else { cpu->cflags_next_tb = -1; } diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c index 42973fb062..847d2079d2 100644 --- a/accel/tcg/tcg-accel-ops-mttcg.c +++ b/accel/tcg/tcg-accel-ops-mttcg.c @@ -114,8 +114,7 @@ void mttcg_start_vcpu_thread(CPUState *cpu) char thread_name[VCPU_THREAD_NAME_SIZE]; g_assert(tcg_enabled()); - - parallel_cpus = (current_machine->smp.max_cpus > 1); + tcg_cpu_init_cflags(cpu, current_machine->smp.max_cpus > 1); cpu->thread = g_malloc0(sizeof(QemuThread)); cpu->halt_cond = g_malloc0(sizeof(QemuCond)); diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c index 4a66055e0d..018b54c508 100644 --- a/accel/tcg/tcg-accel-ops-rr.c +++ b/accel/tcg/tcg-accel-ops-rr.c @@ -269,7 +269,7 @@ void rr_start_vcpu_thread(CPUState *cpu) static QemuThread *single_tcg_cpu_thread; g_assert(tcg_enabled()); - parallel_cpus = false; + tcg_cpu_init_cflags(cpu, false); if (!single_tcg_cpu_thread) { cpu->thread = g_malloc0(sizeof(QemuThread)); diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c index 6144d9df87..6cdcaa2855 100644 --- a/accel/tcg/tcg-accel-ops.c +++ b/accel/tcg/tcg-accel-ops.c @@ -41,6 +41,14 @@ /* common functionality among all TCG variants */ +void tcg_cpu_init_cflags(CPUState *cpu, bool parallel) +{ + uint32_t cflags = cpu->cluster_index << CF_CLUSTER_SHIFT; + cflags |= parallel ? CF_PARALLEL : 0; + cflags |= icount_enabled() ? CF_USE_ICOUNT : 0; + cpu->tcg_cflags = cflags; +} + void tcg_cpus_destroy(CPUState *cpu) { cpu_thread_signal_destroyed(cpu); diff --git a/accel/tcg/tcg-accel-ops.h b/accel/tcg/tcg-accel-ops.h index 48130006de..6a5fcef889 100644 --- a/accel/tcg/tcg-accel-ops.h +++ b/accel/tcg/tcg-accel-ops.h @@ -17,5 +17,6 @@ void tcg_cpus_destroy(CPUState *cpu); int tcg_cpus_exec(CPUState *cpu); void tcg_handle_interrupt(CPUState *cpu, int mask); +void tcg_cpu_init_cflags(CPUState *cpu, bool parallel); #endif /* TCG_CPUS_H */ diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c index d736f4ff55..49f5de37e8 100644 --- a/accel/tcg/tcg-runtime.c +++ b/accel/tcg/tcg-runtime.c @@ -27,10 +27,10 @@ #include "exec/helper-proto.h" #include "exec/cpu_ldst.h" #include "exec/exec-all.h" -#include "exec/tb-lookup.h" #include "disas/disas.h" #include "exec/log.h" #include "tcg/tcg.h" +#include "exec/tb-lookup.h" /* 32-bit helpers */ @@ -152,7 +152,9 @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env) target_ulong cs_base, pc; uint32_t flags; - tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, curr_cflags()); + cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); + + tb = tb_lookup(cpu, pc, cs_base, flags, curr_cflags(cpu)); if (tb == NULL) { return tcg_code_gen_epilogue; } diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index bbd919a393..f32df8b240 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -224,7 +224,6 @@ static void *l1_map[V_L1_MAX_SIZE]; TCGContext tcg_init_ctx; __thread TCGContext *tcg_ctx; TBContext tb_ctx; -bool parallel_cpus; static void page_table_config_init(void) { @@ -1311,7 +1310,7 @@ static bool tb_cmp(const void *ap, const void *bp) return a->pc == b->pc && a->cs_base == b->cs_base && a->flags == b->flags && - (tb_cflags(a) & CF_HASH_MASK) == (tb_cflags(b) & CF_HASH_MASK) && + (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) && a->trace_vcpu_dstate == b->trace_vcpu_dstate && a->page_addr[0] == b->page_addr[0] && a->page_addr[1] == b->page_addr[1]; @@ -1616,6 +1615,7 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) PageDesc *p; uint32_t h; tb_page_addr_t phys_pc; + uint32_t orig_cflags = tb_cflags(tb); assert_memory_lock(); @@ -1626,7 +1626,7 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) /* remove the TB from the hash list */ phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); - h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb_cflags(tb) & CF_HASH_MASK, + h = tb_hash_func(phys_pc, tb->pc, tb->flags, orig_cflags, tb->trace_vcpu_dstate); if (!qht_remove(&tb_ctx.htable, tb, h)) { return; @@ -1793,6 +1793,7 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, uint32_t h; assert_memory_lock(); + tcg_debug_assert(!(tb->cflags & CF_INVALID)); /* * Add the TB to the page list, acquiring first the pages's locks. @@ -1811,7 +1812,7 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, } /* add in the hash table */ - h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK, + h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags, tb->trace_vcpu_dstate); qht_insert(&tb_ctx.htable, tb, h, &existing_tb); @@ -1865,9 +1866,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu, cflags = (cflags & ~CF_COUNT_MASK) | 1; } - cflags &= ~CF_CLUSTER_MASK; - cflags |= cpu->cluster_index << CF_CLUSTER_SHIFT; - max_insns = cflags & CF_COUNT_MASK; if (max_insns == 0) { max_insns = CF_COUNT_MASK; @@ -2194,7 +2192,7 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages, if (current_tb_modified) { page_collection_unlock(pages); /* Force execution of one insn next time. */ - cpu->cflags_next_tb = 1 | curr_cflags(); + cpu->cflags_next_tb = 1 | curr_cflags(cpu); mmap_unlock(); cpu_loop_exit_noexc(cpu); } @@ -2362,7 +2360,7 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc) #ifdef TARGET_HAS_PRECISE_SMC if (current_tb_modified) { /* Force execution of one insn next time. */ - cpu->cflags_next_tb = 1 | curr_cflags(); + cpu->cflags_next_tb = 1 | curr_cflags(cpu); return true; } #endif @@ -2438,7 +2436,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) * operations only (which execute after completion) so we don't * double instrument the instruction. */ - cpu->cflags_next_tb = curr_cflags() | CF_MEMI_ONLY | CF_LAST_IO | n; + cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n; qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc, "cpu_io_recompile: rewound execution of TB to " diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index b7b3c0ef12..6b036cae8f 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -448,9 +448,6 @@ struct TranslationBlock { target_ulong pc; /* simulated PC corresponding to this block (EIP + CS base) */ target_ulong cs_base; /* CS base for this block */ uint32_t flags; /* flags defining in which context the code was generated */ - uint16_t size; /* size of target code for this block (1 <= - size <= TARGET_PAGE_SIZE) */ - uint16_t icount; uint32_t cflags; /* compile flags */ #define CF_COUNT_MASK 0x00007fff #define CF_LAST_IO 0x00008000 /* Last insn may be an IO access. */ @@ -460,12 +457,18 @@ struct TranslationBlock { #define CF_PARALLEL 0x00080000 /* Generate code for a parallel context */ #define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */ #define CF_CLUSTER_SHIFT 24 -/* cflags' mask for hashing/comparison, basically ignore CF_INVALID */ -#define CF_HASH_MASK (~CF_INVALID) /* Per-vCPU dynamic tracing state used to generate this TB */ uint32_t trace_vcpu_dstate; + /* + * Above fields used for comparing + */ + + /* size of target code for this block (1 <= size <= TARGET_PAGE_SIZE) */ + uint16_t size; + uint16_t icount; + struct tb_tc tc; /* first and second physical page containing code. The lower bit @@ -510,8 +513,6 @@ struct TranslationBlock { uintptr_t jmp_dest[2]; }; -extern bool parallel_cpus; - /* Hide the qatomic_read to make code a little easier on the eyes */ static inline uint32_t tb_cflags(const TranslationBlock *tb) { @@ -519,10 +520,9 @@ static inline uint32_t tb_cflags(const TranslationBlock *tb) } /* current cflags for hashing/comparison */ -static inline uint32_t curr_cflags(void) +static inline uint32_t curr_cflags(CPUState *cpu) { - return (parallel_cpus ? CF_PARALLEL : 0) - | (icount_enabled() ? CF_USE_ICOUNT : 0); + return cpu->tcg_cflags; } /* TranslationBlock invalidate API */ @@ -536,7 +536,7 @@ void tb_flush(CPUState *cpu); void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr); TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, target_ulong cs_base, uint32_t flags, - uint32_t cf_mask); + uint32_t cflags); void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr); /* GETPC is the true target of the return instruction that we'll execute. */ diff --git a/include/exec/tb-lookup.h b/include/exec/tb-lookup.h index 9cf475bb03..29d61ceb34 100644 --- a/include/exec/tb-lookup.h +++ b/include/exec/tb-lookup.h @@ -17,30 +17,28 @@ #include "exec/tb-hash.h" /* Might cause an exception, so have a longjmp destination ready */ -static inline TranslationBlock * -tb_lookup__cpu_state(CPUState *cpu, target_ulong *pc, target_ulong *cs_base, - uint32_t *flags, uint32_t cf_mask) +static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, + target_ulong cs_base, + uint32_t flags, uint32_t cflags) { - CPUArchState *env = (CPUArchState *)cpu->env_ptr; TranslationBlock *tb; uint32_t hash; - cpu_get_tb_cpu_state(env, pc, cs_base, flags); - hash = tb_jmp_cache_hash_func(*pc); + /* we should never be trying to look up an INVALID tb */ + tcg_debug_assert(!(cflags & CF_INVALID)); + + hash = tb_jmp_cache_hash_func(pc); tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]); - cf_mask &= ~CF_CLUSTER_MASK; - cf_mask |= cpu->cluster_index << CF_CLUSTER_SHIFT; - if (likely(tb && - tb->pc == *pc && - tb->cs_base == *cs_base && - tb->flags == *flags && + tb->pc == pc && + tb->cs_base == cs_base && + tb->flags == flags && tb->trace_vcpu_dstate == *cpu->trace_dstate && - (tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == cf_mask)) { + tb_cflags(tb) == cflags)) { return tb; } - tb = tb_htable_lookup(cpu, *pc, *cs_base, *flags, cf_mask); + tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags); if (tb == NULL) { return NULL; } diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index c005d3dc2d..c68bc3ba8a 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -282,6 +282,7 @@ struct qemu_work_item; * to a cluster this will be UNASSIGNED_CLUSTER_INDEX; otherwise it will * be the same as the cluster-id property of the CPU object's TYPE_CPU_CLUSTER * QOM parent. + * @tcg_cflags: Pre-computed cflags for this cpu. * @nr_cores: Number of cores within this CPU package. * @nr_threads: Number of threads within this CPU. * @running: #true if CPU is currently running (lockless). @@ -412,6 +413,7 @@ struct CPUState { /* TODO Move common fields from CPUArchState here. */ int cpu_index; int cluster_index; + uint32_t tcg_cflags; uint32_t halted; uint32_t can_do_io; int32_t exception_index; diff --git a/linux-user/main.c b/linux-user/main.c index 81f48ff54e..4f4746dce8 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -205,6 +205,7 @@ CPUArchState *cpu_copy(CPUArchState *env) /* Reset non arch specific state */ cpu_reset(new_cpu); + new_cpu->tcg_cflags = cpu->tcg_cflags; memcpy(new_env, env, sizeof(CPUArchState)); /* Clone all break/watchpoints. diff --git a/linux-user/sh4/signal.c b/linux-user/sh4/signal.c index cc89a48ff8..29c1ee30e6 100644 --- a/linux-user/sh4/signal.c +++ b/linux-user/sh4/signal.c @@ -82,9 +82,11 @@ static abi_ulong get_sigframe(struct target_sigaction *ka, return (sp - frame_size) & -8ul; } -/* Notice when we're in the middle of a gUSA region and reset. - Note that this will only occur for !parallel_cpus, as we will - translate such sequences differently in a parallel context. */ +/* + * Notice when we're in the middle of a gUSA region and reset. + * Note that this will only occur when #CF_PARALLEL is unset, as we + * will translate such sequences differently in a parallel context. + */ static void unwind_gusa(CPUSH4State *regs) { /* If the stack pointer is sufficiently negative, and we haven't diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 389ec09764..9522f603aa 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -6481,6 +6481,16 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp, /* Grab a mutex so that thread setup appears atomic. */ pthread_mutex_lock(&clone_lock); + /* + * If this is our first additional thread, we need to ensure we + * generate code for parallel execution and flush old translations. + * Do this now so that the copy gets CF_PARALLEL too. + */ + if (!(cpu->tcg_cflags & CF_PARALLEL)) { + cpu->tcg_cflags |= CF_PARALLEL; + tb_flush(cpu); + } + /* we create a new CPU instance. */ new_env = cpu_copy(env); /* Init regs that differ from the parent. */ @@ -6521,14 +6531,6 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp, sigprocmask(SIG_BLOCK, &sigmask, &info.sigmask); cpu->random_seed = qemu_guest_random_seed_thread_part1(); - /* If this is our first additional thread, we need to ensure we - * generate code for parallel execution and flush old translations. - */ - if (!parallel_cpus) { - parallel_cpus = true; - tb_flush(cpu); - } - ret = pthread_create(&info.thread, &attr, clone_func, &info); /* TODO: Free new CPU state if thread creation failed. */ diff --git a/softmmu/physmem.c b/softmmu/physmem.c index 19e0aa9836..7e8b0fab89 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -937,7 +937,7 @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len, cpu_loop_exit_restore(cpu, ra); } else { /* Force execution of one insn next time. */ - cpu->cflags_next_tb = 1 | curr_cflags(); + cpu->cflags_next_tb = 1 | curr_cflags(cpu); mmap_unlock(); if (ra) { cpu_restore_state(cpu, ra, true); diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 1376cdc404..fcaa5aface 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -519,6 +519,39 @@ typedef enum { I3606_BIC = 0x2f001400, I3606_ORR = 0x0f001400, + /* AdvSIMD scalar shift by immediate */ + I3609_SSHR = 0x5f000400, + I3609_SSRA = 0x5f001400, + I3609_SHL = 0x5f005400, + I3609_USHR = 0x7f000400, + I3609_USRA = 0x7f001400, + I3609_SLI = 0x7f005400, + + /* AdvSIMD scalar three same */ + I3611_SQADD = 0x5e200c00, + I3611_SQSUB = 0x5e202c00, + I3611_CMGT = 0x5e203400, + I3611_CMGE = 0x5e203c00, + I3611_SSHL = 0x5e204400, + I3611_ADD = 0x5e208400, + I3611_CMTST = 0x5e208c00, + I3611_UQADD = 0x7e200c00, + I3611_UQSUB = 0x7e202c00, + I3611_CMHI = 0x7e203400, + I3611_CMHS = 0x7e203c00, + I3611_USHL = 0x7e204400, + I3611_SUB = 0x7e208400, + I3611_CMEQ = 0x7e208c00, + + /* AdvSIMD scalar two-reg misc */ + I3612_CMGT0 = 0x5e208800, + I3612_CMEQ0 = 0x5e209800, + I3612_CMLT0 = 0x5e20a800, + I3612_ABS = 0x5e20b800, + I3612_CMGE0 = 0x7e208800, + I3612_CMLE0 = 0x7e209800, + I3612_NEG = 0x7e20b800, + /* AdvSIMD shift by immediate */ I3614_SSHR = 0x0f000400, I3614_SSRA = 0x0f001400, @@ -561,7 +594,7 @@ typedef enum { I3617_CMEQ0 = 0x0e209800, I3617_CMLT0 = 0x0e20a800, I3617_CMGE0 = 0x2e208800, - I3617_CMLE0 = 0x2e20a800, + I3617_CMLE0 = 0x2e209800, I3617_NOT = 0x2e205800, I3617_ABS = 0x0e20b800, I3617_NEG = 0x2e20b800, @@ -735,6 +768,25 @@ static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q, | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5); } +static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn, + TCGReg rd, TCGReg rn, unsigned immhb) +{ + tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f)); +} + +static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn, + unsigned size, TCGReg rd, TCGReg rn, TCGReg rm) +{ + tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16 + | (rn & 0x1f) << 5 | (rd & 0x1f)); +} + +static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn, + unsigned size, TCGReg rd, TCGReg rn) +{ + tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f)); +} + static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q, TCGReg rd, TCGReg rn, unsigned immhb) { @@ -1410,10 +1462,10 @@ static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, } } -static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, - TCGReg rh, TCGReg al, TCGReg ah, - tcg_target_long bl, tcg_target_long bh, - bool const_bl, bool const_bh, bool sub) +static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, + TCGReg rh, TCGReg al, TCGReg ah, + tcg_target_long bl, tcg_target_long bh, + bool const_bl, bool const_bh, bool sub) { TCGReg orig_rl = rl; AArch64Insn insn; @@ -1423,11 +1475,13 @@ static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, } if (const_bl) { - insn = I3401_ADDSI; - if ((bl < 0) ^ sub) { - insn = I3401_SUBSI; + if (bl < 0) { bl = -bl; + insn = sub ? I3401_ADDSI : I3401_SUBSI; + } else { + insn = sub ? I3401_SUBSI : I3401_ADDSI; } + if (unlikely(al == TCG_REG_XZR)) { /* ??? We want to allow al to be zero for the benefit of negation via subtraction. However, that leaves open the @@ -2234,23 +2288,38 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, unsigned vece, const TCGArg *args, const int *const_args) { - static const AArch64Insn cmp_insn[16] = { + static const AArch64Insn cmp_vec_insn[16] = { [TCG_COND_EQ] = I3616_CMEQ, [TCG_COND_GT] = I3616_CMGT, [TCG_COND_GE] = I3616_CMGE, [TCG_COND_GTU] = I3616_CMHI, [TCG_COND_GEU] = I3616_CMHS, }; - static const AArch64Insn cmp0_insn[16] = { + static const AArch64Insn cmp_scalar_insn[16] = { + [TCG_COND_EQ] = I3611_CMEQ, + [TCG_COND_GT] = I3611_CMGT, + [TCG_COND_GE] = I3611_CMGE, + [TCG_COND_GTU] = I3611_CMHI, + [TCG_COND_GEU] = I3611_CMHS, + }; + static const AArch64Insn cmp0_vec_insn[16] = { [TCG_COND_EQ] = I3617_CMEQ0, [TCG_COND_GT] = I3617_CMGT0, [TCG_COND_GE] = I3617_CMGE0, [TCG_COND_LT] = I3617_CMLT0, [TCG_COND_LE] = I3617_CMLE0, }; + static const AArch64Insn cmp0_scalar_insn[16] = { + [TCG_COND_EQ] = I3612_CMEQ0, + [TCG_COND_GT] = I3612_CMGT0, + [TCG_COND_GE] = I3612_CMGE0, + [TCG_COND_LT] = I3612_CMLT0, + [TCG_COND_LE] = I3612_CMLE0, + }; TCGType type = vecl + TCG_TYPE_V64; unsigned is_q = vecl; + bool is_scalar = !is_q && vece == MO_64; TCGArg a0, a1, a2, a3; int cmode, imm8; @@ -2269,19 +2338,35 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, tcg_out_dupm_vec(s, type, vece, a0, a1, a2); break; case INDEX_op_add_vec: - tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2); + if (is_scalar) { + tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2); + } else { + tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2); + } break; case INDEX_op_sub_vec: - tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2); + if (is_scalar) { + tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2); + } else { + tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2); + } break; case INDEX_op_mul_vec: tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2); break; case INDEX_op_neg_vec: - tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1); + if (is_scalar) { + tcg_out_insn(s, 3612, NEG, vece, a0, a1); + } else { + tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1); + } break; case INDEX_op_abs_vec: - tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1); + if (is_scalar) { + tcg_out_insn(s, 3612, ABS, vece, a0, a1); + } else { + tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1); + } break; case INDEX_op_and_vec: if (const_args[2]) { @@ -2335,16 +2420,32 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2); break; case INDEX_op_ssadd_vec: - tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2); + if (is_scalar) { + tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2); + } else { + tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2); + } break; case INDEX_op_sssub_vec: - tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2); + if (is_scalar) { + tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2); + } else { + tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2); + } break; case INDEX_op_usadd_vec: - tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2); + if (is_scalar) { + tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2); + } else { + tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2); + } break; case INDEX_op_ussub_vec: - tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2); + if (is_scalar) { + tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2); + } else { + tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2); + } break; case INDEX_op_smax_vec: tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2); @@ -2362,22 +2463,46 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1); break; case INDEX_op_shli_vec: - tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece)); + if (is_scalar) { + tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece)); + } else { + tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece)); + } break; case INDEX_op_shri_vec: - tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2); + if (is_scalar) { + tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2); + } else { + tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2); + } break; case INDEX_op_sari_vec: - tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2); + if (is_scalar) { + tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2); + } else { + tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2); + } break; case INDEX_op_aa64_sli_vec: - tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece)); + if (is_scalar) { + tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece)); + } else { + tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece)); + } break; case INDEX_op_shlv_vec: - tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2); + if (is_scalar) { + tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2); + } else { + tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2); + } break; case INDEX_op_aa64_sshl_vec: - tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2); + if (is_scalar) { + tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2); + } else { + tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2); + } break; case INDEX_op_cmp_vec: { @@ -2386,30 +2511,58 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, if (cond == TCG_COND_NE) { if (const_args[2]) { - tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1); + if (is_scalar) { + tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1); + } else { + tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1); + } } else { - tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2); + if (is_scalar) { + tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2); + } else { + tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2); + } tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0); } } else { if (const_args[2]) { - insn = cmp0_insn[cond]; - if (insn) { - tcg_out_insn_3617(s, insn, is_q, vece, a0, a1); - break; + if (is_scalar) { + insn = cmp0_scalar_insn[cond]; + if (insn) { + tcg_out_insn_3612(s, insn, vece, a0, a1); + break; + } + } else { + insn = cmp0_vec_insn[cond]; + if (insn) { + tcg_out_insn_3617(s, insn, is_q, vece, a0, a1); + break; + } } tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0); a2 = TCG_VEC_TMP; } - insn = cmp_insn[cond]; - if (insn == 0) { - TCGArg t; - t = a1, a1 = a2, a2 = t; - cond = tcg_swap_cond(cond); - insn = cmp_insn[cond]; - tcg_debug_assert(insn != 0); + if (is_scalar) { + insn = cmp_scalar_insn[cond]; + if (insn == 0) { + TCGArg t; + t = a1, a1 = a2, a2 = t; + cond = tcg_swap_cond(cond); + insn = cmp_scalar_insn[cond]; + tcg_debug_assert(insn != 0); + } + tcg_out_insn_3611(s, insn, vece, a0, a1, a2); + } else { + insn = cmp_vec_insn[cond]; + if (insn == 0) { + TCGArg t; + t = a1, a1 = a2, a2 = t; + cond = tcg_swap_cond(cond); + insn = cmp_vec_insn[cond]; + tcg_debug_assert(insn != 0); + } + tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2); } - tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2); } } break; diff --git a/tcg/tcg.c b/tcg/tcg.c index 63a12b197b..2991112829 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -346,6 +346,12 @@ static void set_jmp_reset_offset(TCGContext *s, int which) s->tb_jmp_reset_offset[which] = tcg_current_code_size(s); } +/* Signal overflow, starting over with fewer guest insns. */ +static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s) +{ + siglongjmp(s->jmp_trans, -2); +} + #define C_PFX1(P, A) P##A #define C_PFX2(P, A, B) P##A##_##B #define C_PFX3(P, A, B, C) P##A##_##B##_##C @@ -507,11 +513,21 @@ static void tcg_region_trees_init(void) } } -static struct tcg_region_tree *tc_ptr_to_region_tree(const void *cp) +static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p) { - void *p = tcg_splitwx_to_rw(cp); size_t region_idx; + /* + * Like tcg_splitwx_to_rw, with no assert. The pc may come from + * a signal handler over which the caller has no control. + */ + if (!in_code_gen_buffer(p)) { + p -= tcg_splitwx_diff; + if (!in_code_gen_buffer(p)) { + return NULL; + } + } + if (p < region.start_aligned) { region_idx = 0; } else { @@ -530,6 +546,7 @@ void tcg_tb_insert(TranslationBlock *tb) { struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr); + g_assert(rt != NULL); qemu_mutex_lock(&rt->lock); g_tree_insert(rt->tree, &tb->tc, tb); qemu_mutex_unlock(&rt->lock); @@ -539,6 +556,7 @@ void tcg_tb_remove(TranslationBlock *tb) { struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr); + g_assert(rt != NULL); qemu_mutex_lock(&rt->lock); g_tree_remove(rt->tree, &tb->tc); qemu_mutex_unlock(&rt->lock); @@ -555,6 +573,10 @@ TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr) TranslationBlock *tb; struct tb_tc s = { .ptr = (void *)tc_ptr }; + if (rt == NULL) { + return NULL; + } + qemu_mutex_lock(&rt->lock); tb = g_tree_lookup(rt->tree, &s); qemu_mutex_unlock(&rt->lock); @@ -1310,8 +1332,7 @@ static TCGTemp *tcg_temp_alloc(TCGContext *s) int n = s->nb_temps++; if (n >= TCG_MAX_TEMPS) { - /* Signal overflow, starting over with fewer guest insns. */ - siglongjmp(s->jmp_trans, -2); + tcg_raise_tb_overflow(s); } return memset(&s->temps[n], 0, sizeof(TCGTemp)); } diff --git a/tcg/tci.c b/tcg/tci.c index fb3c97aaf1..3ccd30c39c 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -57,49 +57,6 @@ static tcg_target_ulong tci_read_reg(const tcg_target_ulong *regs, TCGReg index) return regs[index]; } -#if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64 -static int8_t tci_read_reg8s(const tcg_target_ulong *regs, TCGReg index) -{ - return (int8_t)tci_read_reg(regs, index); -} -#endif - -#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64 -static int16_t tci_read_reg16s(const tcg_target_ulong *regs, TCGReg index) -{ - return (int16_t)tci_read_reg(regs, index); -} -#endif - -#if TCG_TARGET_REG_BITS == 64 -static int32_t tci_read_reg32s(const tcg_target_ulong *regs, TCGReg index) -{ - return (int32_t)tci_read_reg(regs, index); -} -#endif - -static uint8_t tci_read_reg8(const tcg_target_ulong *regs, TCGReg index) -{ - return (uint8_t)tci_read_reg(regs, index); -} - -static uint16_t tci_read_reg16(const tcg_target_ulong *regs, TCGReg index) -{ - return (uint16_t)tci_read_reg(regs, index); -} - -static uint32_t tci_read_reg32(const tcg_target_ulong *regs, TCGReg index) -{ - return (uint32_t)tci_read_reg(regs, index); -} - -#if TCG_TARGET_REG_BITS == 64 -static uint64_t tci_read_reg64(const tcg_target_ulong *regs, TCGReg index) -{ - return tci_read_reg(regs, index); -} -#endif - static void tci_write_reg(tcg_target_ulong *regs, TCGReg index, tcg_target_ulong value) { @@ -169,78 +126,20 @@ tci_read_r(const tcg_target_ulong *regs, const uint8_t **tb_ptr) return value; } -/* Read indexed register (8 bit) from bytecode. */ -static uint8_t tci_read_r8(const tcg_target_ulong *regs, const uint8_t **tb_ptr) -{ - uint8_t value = tci_read_reg8(regs, **tb_ptr); - *tb_ptr += 1; - return value; -} - -#if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64 -/* Read indexed register (8 bit signed) from bytecode. */ -static int8_t tci_read_r8s(const tcg_target_ulong *regs, const uint8_t **tb_ptr) -{ - int8_t value = tci_read_reg8s(regs, **tb_ptr); - *tb_ptr += 1; - return value; -} -#endif - -/* Read indexed register (16 bit) from bytecode. */ -static uint16_t tci_read_r16(const tcg_target_ulong *regs, - const uint8_t **tb_ptr) -{ - uint16_t value = tci_read_reg16(regs, **tb_ptr); - *tb_ptr += 1; - return value; -} - -#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64 -/* Read indexed register (16 bit signed) from bytecode. */ -static int16_t tci_read_r16s(const tcg_target_ulong *regs, - const uint8_t **tb_ptr) -{ - int16_t value = tci_read_reg16s(regs, **tb_ptr); - *tb_ptr += 1; - return value; -} -#endif - -/* Read indexed register (32 bit) from bytecode. */ -static uint32_t tci_read_r32(const tcg_target_ulong *regs, - const uint8_t **tb_ptr) -{ - uint32_t value = tci_read_reg32(regs, **tb_ptr); - *tb_ptr += 1; - return value; -} - #if TCG_TARGET_REG_BITS == 32 /* Read two indexed registers (2 * 32 bit) from bytecode. */ static uint64_t tci_read_r64(const tcg_target_ulong *regs, const uint8_t **tb_ptr) { - uint32_t low = tci_read_r32(regs, tb_ptr); - return tci_uint64(tci_read_r32(regs, tb_ptr), low); + uint32_t low = tci_read_r(regs, tb_ptr); + return tci_uint64(tci_read_r(regs, tb_ptr), low); } #elif TCG_TARGET_REG_BITS == 64 -/* Read indexed register (32 bit signed) from bytecode. */ -static int32_t tci_read_r32s(const tcg_target_ulong *regs, - const uint8_t **tb_ptr) -{ - int32_t value = tci_read_reg32s(regs, **tb_ptr); - *tb_ptr += 1; - return value; -} - /* Read indexed register (64 bit) from bytecode. */ static uint64_t tci_read_r64(const tcg_target_ulong *regs, const uint8_t **tb_ptr) { - uint64_t value = tci_read_reg64(regs, **tb_ptr); - *tb_ptr += 1; - return value; + return tci_read_r(regs, tb_ptr); } #endif @@ -346,51 +245,34 @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition) return result; } -#ifdef CONFIG_SOFTMMU -# define qemu_ld_ub \ - helper_ret_ldub_mmu(env, taddr, oi, (uintptr_t)tb_ptr) -# define qemu_ld_leuw \ - helper_le_lduw_mmu(env, taddr, oi, (uintptr_t)tb_ptr) -# define qemu_ld_leul \ - helper_le_ldul_mmu(env, taddr, oi, (uintptr_t)tb_ptr) -# define qemu_ld_leq \ - helper_le_ldq_mmu(env, taddr, oi, (uintptr_t)tb_ptr) -# define qemu_ld_beuw \ - helper_be_lduw_mmu(env, taddr, oi, (uintptr_t)tb_ptr) -# define qemu_ld_beul \ - helper_be_ldul_mmu(env, taddr, oi, (uintptr_t)tb_ptr) -# define qemu_ld_beq \ - helper_be_ldq_mmu(env, taddr, oi, (uintptr_t)tb_ptr) -# define qemu_st_b(X) \ - helper_ret_stb_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr) -# define qemu_st_lew(X) \ - helper_le_stw_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr) -# define qemu_st_lel(X) \ - helper_le_stl_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr) -# define qemu_st_leq(X) \ - helper_le_stq_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr) -# define qemu_st_bew(X) \ - helper_be_stw_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr) -# define qemu_st_bel(X) \ - helper_be_stl_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr) -# define qemu_st_beq(X) \ - helper_be_stq_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr) -#else -# define qemu_ld_ub ldub_p(g2h(taddr)) -# define qemu_ld_leuw lduw_le_p(g2h(taddr)) -# define qemu_ld_leul (uint32_t)ldl_le_p(g2h(taddr)) -# define qemu_ld_leq ldq_le_p(g2h(taddr)) -# define qemu_ld_beuw lduw_be_p(g2h(taddr)) -# define qemu_ld_beul (uint32_t)ldl_be_p(g2h(taddr)) -# define qemu_ld_beq ldq_be_p(g2h(taddr)) -# define qemu_st_b(X) stb_p(g2h(taddr), X) -# define qemu_st_lew(X) stw_le_p(g2h(taddr), X) -# define qemu_st_lel(X) stl_le_p(g2h(taddr), X) -# define qemu_st_leq(X) stq_le_p(g2h(taddr), X) -# define qemu_st_bew(X) stw_be_p(g2h(taddr), X) -# define qemu_st_bel(X) stl_be_p(g2h(taddr), X) -# define qemu_st_beq(X) stq_be_p(g2h(taddr), X) -#endif +#define qemu_ld_ub \ + cpu_ldub_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr) +#define qemu_ld_leuw \ + cpu_lduw_le_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr) +#define qemu_ld_leul \ + cpu_ldl_le_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr) +#define qemu_ld_leq \ + cpu_ldq_le_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr) +#define qemu_ld_beuw \ + cpu_lduw_be_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr) +#define qemu_ld_beul \ + cpu_ldl_be_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr) +#define qemu_ld_beq \ + cpu_ldq_be_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr) +#define qemu_st_b(X) \ + cpu_stb_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr) +#define qemu_st_lew(X) \ + cpu_stw_le_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr) +#define qemu_st_lel(X) \ + cpu_stl_le_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr) +#define qemu_st_leq(X) \ + cpu_stq_le_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr) +#define qemu_st_bew(X) \ + cpu_stw_be_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr) +#define qemu_st_bel(X) \ + cpu_stl_be_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr) +#define qemu_st_beq(X) \ + cpu_stq_be_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr) #if TCG_TARGET_REG_BITS == 64 # define CASE_32_64(x) \ @@ -483,8 +365,8 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, continue; case INDEX_op_setcond_i32: t0 = *tb_ptr++; - t1 = tci_read_r32(regs, &tb_ptr); - t2 = tci_read_r32(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_r(regs, &tb_ptr); condition = *tb_ptr++; tci_write_reg(regs, t0, tci_compare32(t1, t2, condition)); break; @@ -499,15 +381,15 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, #elif TCG_TARGET_REG_BITS == 64 case INDEX_op_setcond_i64: t0 = *tb_ptr++; - t1 = tci_read_r64(regs, &tb_ptr); - t2 = tci_read_r64(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_r(regs, &tb_ptr); condition = *tb_ptr++; tci_write_reg(regs, t0, tci_compare64(t1, t2, condition)); break; #endif - case INDEX_op_mov_i32: + CASE_32_64(mov) t0 = *tb_ptr++; - t1 = tci_read_r32(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); tci_write_reg(regs, t0, t1); break; case INDEX_op_tci_movi_i32: @@ -550,127 +432,130 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_write_reg(regs, t0, *(uint32_t *)(t1 + t2)); break; CASE_32_64(st8) - t0 = tci_read_r8(regs, &tb_ptr); + t0 = tci_read_r(regs, &tb_ptr); t1 = tci_read_r(regs, &tb_ptr); t2 = tci_read_s32(&tb_ptr); *(uint8_t *)(t1 + t2) = t0; break; CASE_32_64(st16) - t0 = tci_read_r16(regs, &tb_ptr); + t0 = tci_read_r(regs, &tb_ptr); t1 = tci_read_r(regs, &tb_ptr); t2 = tci_read_s32(&tb_ptr); *(uint16_t *)(t1 + t2) = t0; break; case INDEX_op_st_i32: CASE_64(st32) - t0 = tci_read_r32(regs, &tb_ptr); + t0 = tci_read_r(regs, &tb_ptr); t1 = tci_read_r(regs, &tb_ptr); t2 = tci_read_s32(&tb_ptr); *(uint32_t *)(t1 + t2) = t0; break; - /* Arithmetic operations (32 bit). */ + /* Arithmetic operations (mixed 32/64 bit). */ - case INDEX_op_add_i32: + CASE_32_64(add) t0 = *tb_ptr++; - t1 = tci_read_r32(regs, &tb_ptr); - t2 = tci_read_r32(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_r(regs, &tb_ptr); tci_write_reg(regs, t0, t1 + t2); break; - case INDEX_op_sub_i32: + CASE_32_64(sub) t0 = *tb_ptr++; - t1 = tci_read_r32(regs, &tb_ptr); - t2 = tci_read_r32(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_r(regs, &tb_ptr); tci_write_reg(regs, t0, t1 - t2); break; - case INDEX_op_mul_i32: + CASE_32_64(mul) t0 = *tb_ptr++; - t1 = tci_read_r32(regs, &tb_ptr); - t2 = tci_read_r32(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_r(regs, &tb_ptr); tci_write_reg(regs, t0, t1 * t2); break; + CASE_32_64(and) + t0 = *tb_ptr++; + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_r(regs, &tb_ptr); + tci_write_reg(regs, t0, t1 & t2); + break; + CASE_32_64(or) + t0 = *tb_ptr++; + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_r(regs, &tb_ptr); + tci_write_reg(regs, t0, t1 | t2); + break; + CASE_32_64(xor) + t0 = *tb_ptr++; + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_r(regs, &tb_ptr); + tci_write_reg(regs, t0, t1 ^ t2); + break; + + /* Arithmetic operations (32 bit). */ + case INDEX_op_div_i32: t0 = *tb_ptr++; - t1 = tci_read_r32(regs, &tb_ptr); - t2 = tci_read_r32(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_r(regs, &tb_ptr); tci_write_reg(regs, t0, (int32_t)t1 / (int32_t)t2); break; case INDEX_op_divu_i32: t0 = *tb_ptr++; - t1 = tci_read_r32(regs, &tb_ptr); - t2 = tci_read_r32(regs, &tb_ptr); - tci_write_reg(regs, t0, t1 / t2); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_r(regs, &tb_ptr); + tci_write_reg(regs, t0, (uint32_t)t1 / (uint32_t)t2); break; case INDEX_op_rem_i32: t0 = *tb_ptr++; - t1 = tci_read_r32(regs, &tb_ptr); - t2 = tci_read_r32(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_r(regs, &tb_ptr); tci_write_reg(regs, t0, (int32_t)t1 % (int32_t)t2); break; case INDEX_op_remu_i32: t0 = *tb_ptr++; - t1 = tci_read_r32(regs, &tb_ptr); - t2 = tci_read_r32(regs, &tb_ptr); - tci_write_reg(regs, t0, t1 % t2); - break; - case INDEX_op_and_i32: - t0 = *tb_ptr++; - t1 = tci_read_r32(regs, &tb_ptr); - t2 = tci_read_r32(regs, &tb_ptr); - tci_write_reg(regs, t0, t1 & t2); - break; - case INDEX_op_or_i32: - t0 = *tb_ptr++; - t1 = tci_read_r32(regs, &tb_ptr); - t2 = tci_read_r32(regs, &tb_ptr); - tci_write_reg(regs, t0, t1 | t2); - break; - case INDEX_op_xor_i32: - t0 = *tb_ptr++; - t1 = tci_read_r32(regs, &tb_ptr); - t2 = tci_read_r32(regs, &tb_ptr); - tci_write_reg(regs, t0, t1 ^ t2); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_r(regs, &tb_ptr); + tci_write_reg(regs, t0, (uint32_t)t1 % (uint32_t)t2); break; /* Shift/rotate operations (32 bit). */ case INDEX_op_shl_i32: t0 = *tb_ptr++; - t1 = tci_read_r32(regs, &tb_ptr); - t2 = tci_read_r32(regs, &tb_ptr); - tci_write_reg(regs, t0, t1 << (t2 & 31)); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_r(regs, &tb_ptr); + tci_write_reg(regs, t0, (uint32_t)t1 << (t2 & 31)); break; case INDEX_op_shr_i32: t0 = *tb_ptr++; - t1 = tci_read_r32(regs, &tb_ptr); - t2 = tci_read_r32(regs, &tb_ptr); - tci_write_reg(regs, t0, t1 >> (t2 & 31)); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_r(regs, &tb_ptr); + tci_write_reg(regs, t0, (uint32_t)t1 >> (t2 & 31)); break; case INDEX_op_sar_i32: t0 = *tb_ptr++; - t1 = tci_read_r32(regs, &tb_ptr); - t2 = tci_read_r32(regs, &tb_ptr); - tci_write_reg(regs, t0, ((int32_t)t1 >> (t2 & 31))); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_r(regs, &tb_ptr); + tci_write_reg(regs, t0, (int32_t)t1 >> (t2 & 31)); break; #if TCG_TARGET_HAS_rot_i32 case INDEX_op_rotl_i32: t0 = *tb_ptr++; - t1 = tci_read_r32(regs, &tb_ptr); - t2 = tci_read_r32(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_r(regs, &tb_ptr); tci_write_reg(regs, t0, rol32(t1, t2 & 31)); break; case INDEX_op_rotr_i32: t0 = *tb_ptr++; - t1 = tci_read_r32(regs, &tb_ptr); - t2 = tci_read_r32(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_r(regs, &tb_ptr); tci_write_reg(regs, t0, ror32(t1, t2 & 31)); break; #endif #if TCG_TARGET_HAS_deposit_i32 case INDEX_op_deposit_i32: t0 = *tb_ptr++; - t1 = tci_read_r32(regs, &tb_ptr); - t2 = tci_read_r32(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_r(regs, &tb_ptr); tmp16 = *tb_ptr++; tmp8 = *tb_ptr++; tmp32 = (((1 << tmp8) - 1) << tmp16); @@ -678,8 +563,8 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, break; #endif case INDEX_op_brcond_i32: - t0 = tci_read_r32(regs, &tb_ptr); - t1 = tci_read_r32(regs, &tb_ptr); + t0 = tci_read_r(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); condition = *tb_ptr++; label = tci_read_label(&tb_ptr); if (tci_compare32(t0, t1, condition)) { @@ -717,73 +602,68 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, case INDEX_op_mulu2_i32: t0 = *tb_ptr++; t1 = *tb_ptr++; - t2 = tci_read_r32(regs, &tb_ptr); - tmp64 = tci_read_r32(regs, &tb_ptr); - tci_write_reg64(regs, t1, t0, t2 * tmp64); + t2 = tci_read_r(regs, &tb_ptr); + tmp64 = (uint32_t)tci_read_r(regs, &tb_ptr); + tci_write_reg64(regs, t1, t0, (uint32_t)t2 * tmp64); break; #endif /* TCG_TARGET_REG_BITS == 32 */ -#if TCG_TARGET_HAS_ext8s_i32 - case INDEX_op_ext8s_i32: +#if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64 + CASE_32_64(ext8s) t0 = *tb_ptr++; - t1 = tci_read_r8s(regs, &tb_ptr); - tci_write_reg(regs, t0, t1); + t1 = tci_read_r(regs, &tb_ptr); + tci_write_reg(regs, t0, (int8_t)t1); break; #endif -#if TCG_TARGET_HAS_ext16s_i32 - case INDEX_op_ext16s_i32: +#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64 + CASE_32_64(ext16s) t0 = *tb_ptr++; - t1 = tci_read_r16s(regs, &tb_ptr); - tci_write_reg(regs, t0, t1); + t1 = tci_read_r(regs, &tb_ptr); + tci_write_reg(regs, t0, (int16_t)t1); break; #endif -#if TCG_TARGET_HAS_ext8u_i32 - case INDEX_op_ext8u_i32: +#if TCG_TARGET_HAS_ext8u_i32 || TCG_TARGET_HAS_ext8u_i64 + CASE_32_64(ext8u) t0 = *tb_ptr++; - t1 = tci_read_r8(regs, &tb_ptr); - tci_write_reg(regs, t0, t1); + t1 = tci_read_r(regs, &tb_ptr); + tci_write_reg(regs, t0, (uint8_t)t1); break; #endif -#if TCG_TARGET_HAS_ext16u_i32 - case INDEX_op_ext16u_i32: +#if TCG_TARGET_HAS_ext16u_i32 || TCG_TARGET_HAS_ext16u_i64 + CASE_32_64(ext16u) t0 = *tb_ptr++; - t1 = tci_read_r16(regs, &tb_ptr); - tci_write_reg(regs, t0, t1); + t1 = tci_read_r(regs, &tb_ptr); + tci_write_reg(regs, t0, (uint16_t)t1); break; #endif -#if TCG_TARGET_HAS_bswap16_i32 - case INDEX_op_bswap16_i32: +#if TCG_TARGET_HAS_bswap16_i32 || TCG_TARGET_HAS_bswap16_i64 + CASE_32_64(bswap16) t0 = *tb_ptr++; - t1 = tci_read_r16(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); tci_write_reg(regs, t0, bswap16(t1)); break; #endif -#if TCG_TARGET_HAS_bswap32_i32 - case INDEX_op_bswap32_i32: +#if TCG_TARGET_HAS_bswap32_i32 || TCG_TARGET_HAS_bswap32_i64 + CASE_32_64(bswap32) t0 = *tb_ptr++; - t1 = tci_read_r32(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); tci_write_reg(regs, t0, bswap32(t1)); break; #endif -#if TCG_TARGET_HAS_not_i32 - case INDEX_op_not_i32: +#if TCG_TARGET_HAS_not_i32 || TCG_TARGET_HAS_not_i64 + CASE_32_64(not) t0 = *tb_ptr++; - t1 = tci_read_r32(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); tci_write_reg(regs, t0, ~t1); break; #endif -#if TCG_TARGET_HAS_neg_i32 - case INDEX_op_neg_i32: +#if TCG_TARGET_HAS_neg_i32 || TCG_TARGET_HAS_neg_i64 + CASE_32_64(neg) t0 = *tb_ptr++; - t1 = tci_read_r32(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); tci_write_reg(regs, t0, -t1); break; #endif #if TCG_TARGET_REG_BITS == 64 - case INDEX_op_mov_i64: - t0 = *tb_ptr++; - t1 = tci_read_r64(regs, &tb_ptr); - tci_write_reg(regs, t0, t1); - break; case INDEX_op_tci_movi_i64: t0 = *tb_ptr++; t1 = tci_read_i64(&tb_ptr); @@ -805,7 +685,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_write_reg(regs, t0, *(uint64_t *)(t1 + t2)); break; case INDEX_op_st_i64: - t0 = tci_read_r64(regs, &tb_ptr); + t0 = tci_read_r(regs, &tb_ptr); t1 = tci_read_r(regs, &tb_ptr); t2 = tci_read_s32(&tb_ptr); *(uint64_t *)(t1 + t2) = t0; @@ -813,106 +693,70 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, /* Arithmetic operations (64 bit). */ - case INDEX_op_add_i64: - t0 = *tb_ptr++; - t1 = tci_read_r64(regs, &tb_ptr); - t2 = tci_read_r64(regs, &tb_ptr); - tci_write_reg(regs, t0, t1 + t2); - break; - case INDEX_op_sub_i64: - t0 = *tb_ptr++; - t1 = tci_read_r64(regs, &tb_ptr); - t2 = tci_read_r64(regs, &tb_ptr); - tci_write_reg(regs, t0, t1 - t2); - break; - case INDEX_op_mul_i64: - t0 = *tb_ptr++; - t1 = tci_read_r64(regs, &tb_ptr); - t2 = tci_read_r64(regs, &tb_ptr); - tci_write_reg(regs, t0, t1 * t2); - break; case INDEX_op_div_i64: t0 = *tb_ptr++; - t1 = tci_read_r64(regs, &tb_ptr); - t2 = tci_read_r64(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_r(regs, &tb_ptr); tci_write_reg(regs, t0, (int64_t)t1 / (int64_t)t2); break; case INDEX_op_divu_i64: t0 = *tb_ptr++; - t1 = tci_read_r64(regs, &tb_ptr); - t2 = tci_read_r64(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_r(regs, &tb_ptr); tci_write_reg(regs, t0, (uint64_t)t1 / (uint64_t)t2); break; case INDEX_op_rem_i64: t0 = *tb_ptr++; - t1 = tci_read_r64(regs, &tb_ptr); - t2 = tci_read_r64(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_r(regs, &tb_ptr); tci_write_reg(regs, t0, (int64_t)t1 % (int64_t)t2); break; case INDEX_op_remu_i64: t0 = *tb_ptr++; - t1 = tci_read_r64(regs, &tb_ptr); - t2 = tci_read_r64(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_r(regs, &tb_ptr); tci_write_reg(regs, t0, (uint64_t)t1 % (uint64_t)t2); break; - case INDEX_op_and_i64: - t0 = *tb_ptr++; - t1 = tci_read_r64(regs, &tb_ptr); - t2 = tci_read_r64(regs, &tb_ptr); - tci_write_reg(regs, t0, t1 & t2); - break; - case INDEX_op_or_i64: - t0 = *tb_ptr++; - t1 = tci_read_r64(regs, &tb_ptr); - t2 = tci_read_r64(regs, &tb_ptr); - tci_write_reg(regs, t0, t1 | t2); - break; - case INDEX_op_xor_i64: - t0 = *tb_ptr++; - t1 = tci_read_r64(regs, &tb_ptr); - t2 = tci_read_r64(regs, &tb_ptr); - tci_write_reg(regs, t0, t1 ^ t2); - break; /* Shift/rotate operations (64 bit). */ case INDEX_op_shl_i64: t0 = *tb_ptr++; - t1 = tci_read_r64(regs, &tb_ptr); - t2 = tci_read_r64(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_r(regs, &tb_ptr); tci_write_reg(regs, t0, t1 << (t2 & 63)); break; case INDEX_op_shr_i64: t0 = *tb_ptr++; - t1 = tci_read_r64(regs, &tb_ptr); - t2 = tci_read_r64(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_r(regs, &tb_ptr); tci_write_reg(regs, t0, t1 >> (t2 & 63)); break; case INDEX_op_sar_i64: t0 = *tb_ptr++; - t1 = tci_read_r64(regs, &tb_ptr); - t2 = tci_read_r64(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_r(regs, &tb_ptr); tci_write_reg(regs, t0, ((int64_t)t1 >> (t2 & 63))); break; #if TCG_TARGET_HAS_rot_i64 case INDEX_op_rotl_i64: t0 = *tb_ptr++; - t1 = tci_read_r64(regs, &tb_ptr); - t2 = tci_read_r64(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_r(regs, &tb_ptr); tci_write_reg(regs, t0, rol64(t1, t2 & 63)); break; case INDEX_op_rotr_i64: t0 = *tb_ptr++; - t1 = tci_read_r64(regs, &tb_ptr); - t2 = tci_read_r64(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_r(regs, &tb_ptr); tci_write_reg(regs, t0, ror64(t1, t2 & 63)); break; #endif #if TCG_TARGET_HAS_deposit_i64 case INDEX_op_deposit_i64: t0 = *tb_ptr++; - t1 = tci_read_r64(regs, &tb_ptr); - t2 = tci_read_r64(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); + t2 = tci_read_r(regs, &tb_ptr); tmp16 = *tb_ptr++; tmp8 = *tb_ptr++; tmp64 = (((1ULL << tmp8) - 1) << tmp16); @@ -920,8 +764,8 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, break; #endif case INDEX_op_brcond_i64: - t0 = tci_read_r64(regs, &tb_ptr); - t1 = tci_read_r64(regs, &tb_ptr); + t0 = tci_read_r(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); condition = *tb_ptr++; label = tci_read_label(&tb_ptr); if (tci_compare64(t0, t1, condition)) { @@ -930,85 +774,29 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, continue; } break; -#if TCG_TARGET_HAS_ext8u_i64 - case INDEX_op_ext8u_i64: - t0 = *tb_ptr++; - t1 = tci_read_r8(regs, &tb_ptr); - tci_write_reg(regs, t0, t1); - break; -#endif -#if TCG_TARGET_HAS_ext8s_i64 - case INDEX_op_ext8s_i64: - t0 = *tb_ptr++; - t1 = tci_read_r8s(regs, &tb_ptr); - tci_write_reg(regs, t0, t1); - break; -#endif -#if TCG_TARGET_HAS_ext16s_i64 - case INDEX_op_ext16s_i64: - t0 = *tb_ptr++; - t1 = tci_read_r16s(regs, &tb_ptr); - tci_write_reg(regs, t0, t1); - break; -#endif -#if TCG_TARGET_HAS_ext16u_i64 - case INDEX_op_ext16u_i64: - t0 = *tb_ptr++; - t1 = tci_read_r16(regs, &tb_ptr); - tci_write_reg(regs, t0, t1); - break; -#endif #if TCG_TARGET_HAS_ext32s_i64 case INDEX_op_ext32s_i64: #endif case INDEX_op_ext_i32_i64: t0 = *tb_ptr++; - t1 = tci_read_r32s(regs, &tb_ptr); - tci_write_reg(regs, t0, t1); + t1 = tci_read_r(regs, &tb_ptr); + tci_write_reg(regs, t0, (int32_t)t1); break; #if TCG_TARGET_HAS_ext32u_i64 case INDEX_op_ext32u_i64: #endif case INDEX_op_extu_i32_i64: t0 = *tb_ptr++; - t1 = tci_read_r32(regs, &tb_ptr); - tci_write_reg(regs, t0, t1); + t1 = tci_read_r(regs, &tb_ptr); + tci_write_reg(regs, t0, (uint32_t)t1); break; -#if TCG_TARGET_HAS_bswap16_i64 - case INDEX_op_bswap16_i64: - t0 = *tb_ptr++; - t1 = tci_read_r16(regs, &tb_ptr); - tci_write_reg(regs, t0, bswap16(t1)); - break; -#endif -#if TCG_TARGET_HAS_bswap32_i64 - case INDEX_op_bswap32_i64: - t0 = *tb_ptr++; - t1 = tci_read_r32(regs, &tb_ptr); - tci_write_reg(regs, t0, bswap32(t1)); - break; -#endif #if TCG_TARGET_HAS_bswap64_i64 case INDEX_op_bswap64_i64: t0 = *tb_ptr++; - t1 = tci_read_r64(regs, &tb_ptr); + t1 = tci_read_r(regs, &tb_ptr); tci_write_reg(regs, t0, bswap64(t1)); break; #endif -#if TCG_TARGET_HAS_not_i64 - case INDEX_op_not_i64: - t0 = *tb_ptr++; - t1 = tci_read_r64(regs, &tb_ptr); - tci_write_reg(regs, t0, ~t1); - break; -#endif -#if TCG_TARGET_HAS_neg_i64 - case INDEX_op_neg_i64: - t0 = *tb_ptr++; - t1 = tci_read_r64(regs, &tb_ptr); - tci_write_reg(regs, t0, -t1); - break; -#endif #endif /* TCG_TARGET_REG_BITS == 64 */ /* QEMU specific operations. */ diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index feac4659cc..c79f9c32d8 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -380,6 +380,18 @@ static inline void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg) old_code_ptr[1] = s->code_ptr - old_code_ptr; } +#if TCG_TARGET_REG_BITS == 64 +# define CASE_32_64(x) \ + case glue(glue(INDEX_op_, x), _i64): \ + case glue(glue(INDEX_op_, x), _i32): +# define CASE_64(x) \ + case glue(glue(INDEX_op_, x), _i64): +#else +# define CASE_32_64(x) \ + case glue(glue(INDEX_op_, x), _i32): +# define CASE_64(x) +#endif + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) { @@ -391,6 +403,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, case INDEX_op_exit_tb: tcg_out64(s, args[0]); break; + case INDEX_op_goto_tb: if (s->tb_jmp_insn_offset) { /* Direct jump method. */ @@ -404,15 +417,18 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, } set_jmp_reset_offset(s, args[0]); break; + case INDEX_op_br: tci_out_label(s, arg_label(args[0])); break; - case INDEX_op_setcond_i32: + + CASE_32_64(setcond) tcg_out_r(s, args[0]); tcg_out_r(s, args[1]); tcg_out_r(s, args[2]); tcg_out8(s, args[3]); /* condition */ break; + #if TCG_TARGET_REG_BITS == 32 case INDEX_op_setcond2_i32: /* setcond2_i32 cond, t0, t1_low, t1_high, t2_low, t2_high */ @@ -423,60 +439,54 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out_r(s, args[4]); tcg_out8(s, args[5]); /* condition */ break; -#elif TCG_TARGET_REG_BITS == 64 - case INDEX_op_setcond_i64: - tcg_out_r(s, args[0]); - tcg_out_r(s, args[1]); - tcg_out_r(s, args[2]); - tcg_out8(s, args[3]); /* condition */ - break; #endif - case INDEX_op_ld8u_i32: - case INDEX_op_ld8s_i32: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16s_i32: + + CASE_32_64(ld8u) + CASE_32_64(ld8s) + CASE_32_64(ld16u) + CASE_32_64(ld16s) case INDEX_op_ld_i32: - case INDEX_op_st8_i32: - case INDEX_op_st16_i32: + CASE_64(ld32u) + CASE_64(ld32s) + CASE_64(ld) + CASE_32_64(st8) + CASE_32_64(st16) case INDEX_op_st_i32: - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i64: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i64: - case INDEX_op_ld32u_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld_i64: - case INDEX_op_st8_i64: - case INDEX_op_st16_i64: - case INDEX_op_st32_i64: - case INDEX_op_st_i64: + CASE_64(st32) + CASE_64(st) stack_bounds_check(args[1], args[2]); tcg_out_r(s, args[0]); tcg_out_r(s, args[1]); tcg_debug_assert(args[2] == (int32_t)args[2]); tcg_out32(s, args[2]); break; - case INDEX_op_add_i32: - case INDEX_op_sub_i32: - case INDEX_op_mul_i32: - case INDEX_op_and_i32: - case INDEX_op_andc_i32: /* Optional (TCG_TARGET_HAS_andc_i32). */ - case INDEX_op_eqv_i32: /* Optional (TCG_TARGET_HAS_eqv_i32). */ - case INDEX_op_nand_i32: /* Optional (TCG_TARGET_HAS_nand_i32). */ - case INDEX_op_nor_i32: /* Optional (TCG_TARGET_HAS_nor_i32). */ - case INDEX_op_or_i32: - case INDEX_op_orc_i32: /* Optional (TCG_TARGET_HAS_orc_i32). */ - case INDEX_op_xor_i32: - case INDEX_op_shl_i32: - case INDEX_op_shr_i32: - case INDEX_op_sar_i32: - case INDEX_op_rotl_i32: /* Optional (TCG_TARGET_HAS_rot_i32). */ - case INDEX_op_rotr_i32: /* Optional (TCG_TARGET_HAS_rot_i32). */ + + CASE_32_64(add) + CASE_32_64(sub) + CASE_32_64(mul) + CASE_32_64(and) + CASE_32_64(or) + CASE_32_64(xor) + CASE_32_64(andc) /* Optional (TCG_TARGET_HAS_andc_*). */ + CASE_32_64(orc) /* Optional (TCG_TARGET_HAS_orc_*). */ + CASE_32_64(eqv) /* Optional (TCG_TARGET_HAS_eqv_*). */ + CASE_32_64(nand) /* Optional (TCG_TARGET_HAS_nand_*). */ + CASE_32_64(nor) /* Optional (TCG_TARGET_HAS_nor_*). */ + CASE_32_64(shl) + CASE_32_64(shr) + CASE_32_64(sar) + CASE_32_64(rotl) /* Optional (TCG_TARGET_HAS_rot_*). */ + CASE_32_64(rotr) /* Optional (TCG_TARGET_HAS_rot_*). */ + CASE_32_64(div) /* Optional (TCG_TARGET_HAS_div_*). */ + CASE_32_64(divu) /* Optional (TCG_TARGET_HAS_div_*). */ + CASE_32_64(rem) /* Optional (TCG_TARGET_HAS_div_*). */ + CASE_32_64(remu) /* Optional (TCG_TARGET_HAS_div_*). */ tcg_out_r(s, args[0]); tcg_out_r(s, args[1]); tcg_out_r(s, args[2]); break; - case INDEX_op_deposit_i32: /* Optional (TCG_TARGET_HAS_deposit_i32). */ + + CASE_32_64(deposit) /* Optional (TCG_TARGET_HAS_deposit_*). */ tcg_out_r(s, args[0]); tcg_out_r(s, args[1]); tcg_out_r(s, args[2]); @@ -486,79 +496,30 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out8(s, args[4]); break; -#if TCG_TARGET_REG_BITS == 64 - case INDEX_op_add_i64: - case INDEX_op_sub_i64: - case INDEX_op_mul_i64: - case INDEX_op_and_i64: - case INDEX_op_andc_i64: /* Optional (TCG_TARGET_HAS_andc_i64). */ - case INDEX_op_eqv_i64: /* Optional (TCG_TARGET_HAS_eqv_i64). */ - case INDEX_op_nand_i64: /* Optional (TCG_TARGET_HAS_nand_i64). */ - case INDEX_op_nor_i64: /* Optional (TCG_TARGET_HAS_nor_i64). */ - case INDEX_op_or_i64: - case INDEX_op_orc_i64: /* Optional (TCG_TARGET_HAS_orc_i64). */ - case INDEX_op_xor_i64: - case INDEX_op_shl_i64: - case INDEX_op_shr_i64: - case INDEX_op_sar_i64: - case INDEX_op_rotl_i64: /* Optional (TCG_TARGET_HAS_rot_i64). */ - case INDEX_op_rotr_i64: /* Optional (TCG_TARGET_HAS_rot_i64). */ - case INDEX_op_div_i64: /* Optional (TCG_TARGET_HAS_div_i64). */ - case INDEX_op_divu_i64: /* Optional (TCG_TARGET_HAS_div_i64). */ - case INDEX_op_rem_i64: /* Optional (TCG_TARGET_HAS_div_i64). */ - case INDEX_op_remu_i64: /* Optional (TCG_TARGET_HAS_div_i64). */ - tcg_out_r(s, args[0]); - tcg_out_r(s, args[1]); - tcg_out_r(s, args[2]); - break; - case INDEX_op_deposit_i64: /* Optional (TCG_TARGET_HAS_deposit_i64). */ - tcg_out_r(s, args[0]); - tcg_out_r(s, args[1]); - tcg_out_r(s, args[2]); - tcg_debug_assert(args[3] <= UINT8_MAX); - tcg_out8(s, args[3]); - tcg_debug_assert(args[4] <= UINT8_MAX); - tcg_out8(s, args[4]); - break; - case INDEX_op_brcond_i64: + CASE_32_64(brcond) tcg_out_r(s, args[0]); tcg_out_r(s, args[1]); tcg_out8(s, args[2]); /* condition */ tci_out_label(s, arg_label(args[3])); break; - case INDEX_op_bswap16_i64: /* Optional (TCG_TARGET_HAS_bswap16_i64). */ - case INDEX_op_bswap32_i64: /* Optional (TCG_TARGET_HAS_bswap32_i64). */ - case INDEX_op_bswap64_i64: /* Optional (TCG_TARGET_HAS_bswap64_i64). */ - case INDEX_op_not_i64: /* Optional (TCG_TARGET_HAS_not_i64). */ - case INDEX_op_neg_i64: /* Optional (TCG_TARGET_HAS_neg_i64). */ - case INDEX_op_ext8s_i64: /* Optional (TCG_TARGET_HAS_ext8s_i64). */ - case INDEX_op_ext8u_i64: /* Optional (TCG_TARGET_HAS_ext8u_i64). */ - case INDEX_op_ext16s_i64: /* Optional (TCG_TARGET_HAS_ext16s_i64). */ - case INDEX_op_ext16u_i64: /* Optional (TCG_TARGET_HAS_ext16u_i64). */ - case INDEX_op_ext32s_i64: /* Optional (TCG_TARGET_HAS_ext32s_i64). */ - case INDEX_op_ext32u_i64: /* Optional (TCG_TARGET_HAS_ext32u_i64). */ - case INDEX_op_ext_i32_i64: - case INDEX_op_extu_i32_i64: -#endif /* TCG_TARGET_REG_BITS == 64 */ - case INDEX_op_neg_i32: /* Optional (TCG_TARGET_HAS_neg_i32). */ - case INDEX_op_not_i32: /* Optional (TCG_TARGET_HAS_not_i32). */ - case INDEX_op_ext8s_i32: /* Optional (TCG_TARGET_HAS_ext8s_i32). */ - case INDEX_op_ext16s_i32: /* Optional (TCG_TARGET_HAS_ext16s_i32). */ - case INDEX_op_ext8u_i32: /* Optional (TCG_TARGET_HAS_ext8u_i32). */ - case INDEX_op_ext16u_i32: /* Optional (TCG_TARGET_HAS_ext16u_i32). */ - case INDEX_op_bswap16_i32: /* Optional (TCG_TARGET_HAS_bswap16_i32). */ - case INDEX_op_bswap32_i32: /* Optional (TCG_TARGET_HAS_bswap32_i32). */ + + CASE_32_64(neg) /* Optional (TCG_TARGET_HAS_neg_*). */ + CASE_32_64(not) /* Optional (TCG_TARGET_HAS_not_*). */ + CASE_32_64(ext8s) /* Optional (TCG_TARGET_HAS_ext8s_*). */ + CASE_32_64(ext8u) /* Optional (TCG_TARGET_HAS_ext8u_*). */ + CASE_32_64(ext16s) /* Optional (TCG_TARGET_HAS_ext16s_*). */ + CASE_32_64(ext16u) /* Optional (TCG_TARGET_HAS_ext16u_*). */ + CASE_64(ext32s) /* Optional (TCG_TARGET_HAS_ext32s_i64). */ + CASE_64(ext32u) /* Optional (TCG_TARGET_HAS_ext32u_i64). */ + CASE_64(ext_i32) + CASE_64(extu_i32) + CASE_32_64(bswap16) /* Optional (TCG_TARGET_HAS_bswap16_*). */ + CASE_32_64(bswap32) /* Optional (TCG_TARGET_HAS_bswap32_*). */ + CASE_64(bswap64) /* Optional (TCG_TARGET_HAS_bswap64_i64). */ tcg_out_r(s, args[0]); tcg_out_r(s, args[1]); break; - case INDEX_op_div_i32: /* Optional (TCG_TARGET_HAS_div_i32). */ - case INDEX_op_divu_i32: /* Optional (TCG_TARGET_HAS_div_i32). */ - case INDEX_op_rem_i32: /* Optional (TCG_TARGET_HAS_div_i32). */ - case INDEX_op_remu_i32: /* Optional (TCG_TARGET_HAS_div_i32). */ - tcg_out_r(s, args[0]); - tcg_out_r(s, args[1]); - tcg_out_r(s, args[2]); - break; + #if TCG_TARGET_REG_BITS == 32 case INDEX_op_add2_i32: case INDEX_op_sub2_i32: @@ -584,31 +545,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out_r(s, args[3]); break; #endif - case INDEX_op_brcond_i32: - tcg_out_r(s, args[0]); - tcg_out_r(s, args[1]); - tcg_out8(s, args[2]); /* condition */ - tci_out_label(s, arg_label(args[3])); - break; + case INDEX_op_qemu_ld_i32: - tcg_out_r(s, *args++); - tcg_out_r(s, *args++); - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - tcg_out_r(s, *args++); - } - tcg_out_i(s, *args++); - break; - case INDEX_op_qemu_ld_i64: - tcg_out_r(s, *args++); - if (TCG_TARGET_REG_BITS == 32) { - tcg_out_r(s, *args++); - } - tcg_out_r(s, *args++); - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - tcg_out_r(s, *args++); - } - tcg_out_i(s, *args++); - break; case INDEX_op_qemu_st_i32: tcg_out_r(s, *args++); tcg_out_r(s, *args++); @@ -617,6 +555,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, } tcg_out_i(s, *args++); break; + + case INDEX_op_qemu_ld_i64: case INDEX_op_qemu_st_i64: tcg_out_r(s, *args++); if (TCG_TARGET_REG_BITS == 32) { @@ -628,8 +568,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, } tcg_out_i(s, *args++); break; + case INDEX_op_mb: break; + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */