This is the MTTCG pull-request as posted yesterday.

-----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQEcBAABAgAGBQJYsBZfAAoJEPvQ2wlanipElJ0H+QGoStSPeHrvKu7Q07v4F9zM
 Pvf05gRsaxvXl7UbwmXC4oKhvZf9rVJ6ITk0x/y0WvmK0mHCmNBWkC0nn5UFL5IH
 cdxetLz21Q+Ghpc36tZvqn2HYwRQFoEznge2LdtBDG0TyVA4jwquHU3HCG2D51zi
 BaImI6lYW1e4ejjZHw8cEInSxsj/HJZE4pPas2Tkci+uAnrJroErwBVRRcE/y/Tn
 aupl9TJFs2JdyJFNDibIm0kjB+i+jvCiLgYjbKZ/dR/+GZt73TtiBk/q9ZOFjdmT
 7YFPI3F46QbGHoZahtzh0Xt7WMj94SlQgQ9OJ3zmNMfpXrze6Yc78xo/nbQ33U0=
 =wR0/
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/stsquad/tags/pull-mttcg-240217-1' into staging

This is the MTTCG pull-request as posted yesterday.

# gpg: Signature made Fri 24 Feb 2017 11:17:51 GMT
# gpg:                using RSA key 0xFBD0DB095A9E2A44
# gpg: Good signature from "Alex Bennée (Master Work Key) <alex.bennee@linaro.org>"
# Primary key fingerprint: 6685 AE99 E751 67BC AFC8  DF35 FBD0 DB09 5A9E 2A44

* remotes/stsquad/tags/pull-mttcg-240217-1: (24 commits)
  tcg: enable MTTCG by default for ARM on x86 hosts
  hw/misc/imx6_src: defer clearing of SRC_SCR reset bits
  target-arm: ensure all cross vCPUs TLB flushes complete
  target-arm: don't generate WFE/YIELD calls for MTTCG
  target-arm/powerctl: defer cpu reset work to CPU context
  cputlb: introduce tlb_flush_*_all_cpus[_synced]
  cputlb: atomically update tlb fields used by tlb_reset_dirty
  cputlb: add tlb_flush_by_mmuidx async routines
  cputlb and arm/sparc targets: convert mmuidx flushes from varg to bitmap
  cputlb: introduce tlb_flush_* async work.
  cputlb: tweak qemu_ram_addr_from_host_nofail reporting
  cputlb: add assert_cpu_is_self checks
  tcg: handle EXCP_ATOMIC exception for system emulation
  tcg: enable thread-per-vCPU
  tcg: enable tb_lock() for SoftMMU
  tcg: remove global exit_request
  tcg: drop global lock during TCG code execution
  tcg: rename tcg_current_cpu to tcg_current_rr_cpu
  tcg: add kick timer for single-threaded vCPU emulation
  tcg: add options for enabling MTTCG
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2017-02-25 18:43:52 +00:00
commit 28f997a82c
40 changed files with 1881 additions and 479 deletions

6
configure vendored
View file

@ -5879,6 +5879,7 @@ mkdir -p $target_dir
echo "# Automatically generated by configure - do not modify" > $config_target_mak
bflt="no"
mttcg="no"
interp_prefix1=$(echo "$interp_prefix" | sed "s/%M/$target_name/g")
gdb_xml_files=""
@ -5897,11 +5898,13 @@ case "$target_name" in
arm|armeb)
TARGET_ARCH=arm
bflt="yes"
mttcg="yes"
gdb_xml_files="arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml"
;;
aarch64)
TARGET_BASE_ARCH=arm
bflt="yes"
mttcg="yes"
gdb_xml_files="aarch64-core.xml aarch64-fpu.xml arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml"
;;
cris)
@ -6066,6 +6069,9 @@ if test "$target_bigendian" = "yes" ; then
fi
if test "$target_softmmu" = "yes" ; then
echo "CONFIG_SOFTMMU=y" >> $config_target_mak
if test "$mttcg" = "yes" ; then
echo "TARGET_SUPPORTS_MTTCG=y" >> $config_target_mak
fi
fi
if test "$target_user_only" = "yes" ; then
echo "CONFIG_USER_ONLY=y" >> $config_target_mak

View file

@ -23,9 +23,6 @@
#include "exec/exec-all.h"
#include "exec/memory-internal.h"
bool exit_request;
CPUState *tcg_current_cpu;
/* exit the current TB, but without causing any exception to be raised */
void cpu_loop_exit_noexc(CPUState *cpu)
{

View file

@ -29,6 +29,7 @@
#include "qemu/rcu.h"
#include "exec/tb-hash.h"
#include "exec/log.h"
#include "qemu/main-loop.h"
#if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
#include "hw/i386/apic.h"
#endif
@ -227,20 +228,43 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
static void cpu_exec_step(CPUState *cpu)
{
CPUClass *cc = CPU_GET_CLASS(cpu);
CPUArchState *env = (CPUArchState *)cpu->env_ptr;
TranslationBlock *tb;
target_ulong cs_base, pc;
uint32_t flags;
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
tb = tb_gen_code(cpu, pc, cs_base, flags,
1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
tb->orig_tb = NULL;
/* execute the generated code */
trace_exec_tb_nocache(tb, pc);
cpu_tb_exec(cpu, tb);
tb_phys_invalidate(tb, -1);
tb_free(tb);
if (sigsetjmp(cpu->jmp_env, 0) == 0) {
mmap_lock();
tb_lock();
tb = tb_gen_code(cpu, pc, cs_base, flags,
1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
tb->orig_tb = NULL;
tb_unlock();
mmap_unlock();
cc->cpu_exec_enter(cpu);
/* execute the generated code */
trace_exec_tb_nocache(tb, pc);
cpu_tb_exec(cpu, tb);
cc->cpu_exec_exit(cpu);
tb_lock();
tb_phys_invalidate(tb, -1);
tb_free(tb);
tb_unlock();
} else {
/* We may have exited due to another problem here, so we need
* to reset any tb_locks we may have taken but didn't release.
* The mmap_lock is dropped by tb_gen_code if it runs out of
* memory.
*/
#ifndef CONFIG_SOFTMMU
tcg_debug_assert(!have_mmap_lock());
#endif
tb_lock_reset();
}
}
void cpu_exec_step_atomic(CPUState *cpu)
@ -384,12 +408,13 @@ static inline bool cpu_handle_halt(CPUState *cpu)
if ((cpu->interrupt_request & CPU_INTERRUPT_POLL)
&& replay_interrupt()) {
X86CPU *x86_cpu = X86_CPU(cpu);
qemu_mutex_lock_iothread();
apic_poll_irq(x86_cpu->apic_state);
cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL);
qemu_mutex_unlock_iothread();
}
#endif
if (!cpu_has_work(cpu)) {
current_cpu = NULL;
return true;
}
@ -439,7 +464,9 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
#else
if (replay_exception()) {
CPUClass *cc = CPU_GET_CLASS(cpu);
qemu_mutex_lock_iothread();
cc->do_interrupt(cpu);
qemu_mutex_unlock_iothread();
cpu->exception_index = -1;
} else if (!replay_has_interrupt()) {
/* give a chance to iothread in replay mode */
@ -465,9 +492,11 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
TranslationBlock **last_tb)
{
CPUClass *cc = CPU_GET_CLASS(cpu);
int interrupt_request = cpu->interrupt_request;
if (unlikely(interrupt_request)) {
if (unlikely(atomic_read(&cpu->interrupt_request))) {
int interrupt_request;
qemu_mutex_lock_iothread();
interrupt_request = cpu->interrupt_request;
if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
/* Mask out external interrupts for this step. */
interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK;
@ -475,6 +504,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
if (interrupt_request & CPU_INTERRUPT_DEBUG) {
cpu->interrupt_request &= ~CPU_INTERRUPT_DEBUG;
cpu->exception_index = EXCP_DEBUG;
qemu_mutex_unlock_iothread();
return true;
}
if (replay_mode == REPLAY_MODE_PLAY && !replay_has_interrupt()) {
@ -484,6 +514,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
cpu->interrupt_request &= ~CPU_INTERRUPT_HALT;
cpu->halted = 1;
cpu->exception_index = EXCP_HLT;
qemu_mutex_unlock_iothread();
return true;
}
#if defined(TARGET_I386)
@ -494,12 +525,14 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0, 0);
do_cpu_init(x86_cpu);
cpu->exception_index = EXCP_HALTED;
qemu_mutex_unlock_iothread();
return true;
}
#else
else if (interrupt_request & CPU_INTERRUPT_RESET) {
replay_interrupt();
cpu_reset(cpu);
qemu_mutex_unlock_iothread();
return true;
}
#endif
@ -522,7 +555,12 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
the program flow was changed */
*last_tb = NULL;
}
/* If we exit via cpu_loop_exit/longjmp it is reset in cpu_exec */
qemu_mutex_unlock_iothread();
}
if (unlikely(atomic_read(&cpu->exit_request) || replay_has_interrupt())) {
atomic_set(&cpu->exit_request, 0);
cpu->exception_index = EXCP_INTERRUPT;
@ -548,15 +586,13 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
*tb_exit = ret & TB_EXIT_MASK;
switch (*tb_exit) {
case TB_EXIT_REQUESTED:
/* Something asked us to stop executing
* chained TBs; just continue round the main
* loop. Whatever requested the exit will also
* have set something else (eg exit_request or
* interrupt_request) which we will handle
* next time around the loop. But we need to
* ensure the zeroing of tcg_exit_req (see cpu_tb_exec)
* comes before the next read of cpu->exit_request
* or cpu->interrupt_request.
/* Something asked us to stop executing chained TBs; just
* continue round the main loop. Whatever requested the exit
* will also have set something else (eg interrupt_request)
* which we will handle next time around the loop. But we
* need to ensure the tcg_exit_req read in generated code
* comes before the next read of cpu->exit_request or
* cpu->interrupt_request.
*/
smp_mb();
*last_tb = NULL;
@ -608,13 +644,8 @@ int cpu_exec(CPUState *cpu)
return EXCP_HALTED;
}
atomic_mb_set(&tcg_current_cpu, cpu);
rcu_read_lock();
if (unlikely(atomic_mb_read(&exit_request))) {
cpu->exit_request = 1;
}
cc->cpu_exec_enter(cpu);
/* Calculate difference between guest clock and host clock.
@ -640,6 +671,9 @@ int cpu_exec(CPUState *cpu)
#endif /* buggy compiler */
cpu->can_do_io = 1;
tb_lock_reset();
if (qemu_mutex_iothread_locked()) {
qemu_mutex_unlock_iothread();
}
}
/* if an exception is pending, we execute it here */
@ -659,10 +693,5 @@ int cpu_exec(CPUState *cpu)
cc->cpu_exec_exit(cpu);
rcu_read_unlock();
/* fail safe : never use current_cpu outside cpu_exec() */
current_cpu = NULL;
/* Does not need atomic_mb_set because a spurious wakeup is okay. */
atomic_set(&tcg_current_cpu, NULL);
return ret;
}

345
cpus.c
View file

@ -25,6 +25,7 @@
/* Needed early for CONFIG_BSD etc. */
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "qemu/config-file.h"
#include "cpu.h"
#include "monitor/monitor.h"
#include "qapi/qmp/qerror.h"
@ -45,6 +46,7 @@
#include "qemu/main-loop.h"
#include "qemu/bitmap.h"
#include "qemu/seqlock.h"
#include "tcg.h"
#include "qapi-event.h"
#include "hw/nmi.h"
#include "sysemu/replay.h"
@ -150,6 +152,77 @@ typedef struct TimersState {
} TimersState;
static TimersState timers_state;
bool mttcg_enabled;
/*
* We default to false if we know other options have been enabled
* which are currently incompatible with MTTCG. Otherwise when each
* guest (target) has been updated to support:
* - atomic instructions
* - memory ordering primitives (barriers)
* they can set the appropriate CONFIG flags in ${target}-softmmu.mak
*
* Once a guest architecture has been converted to the new primitives
* there are two remaining limitations to check.
*
* - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
* - The host must have a stronger memory order than the guest
*
* It may be possible in future to support strong guests on weak hosts
* but that will require tagging all load/stores in a guest with their
* implicit memory order requirements which would likely slow things
* down a lot.
*/
static bool check_tcg_memory_orders_compatible(void)
{
#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
#else
return false;
#endif
}
static bool default_mttcg_enabled(void)
{
QemuOpts *icount_opts = qemu_find_opts_singleton("icount");
const char *rr = qemu_opt_get(icount_opts, "rr");
if (rr || TCG_OVERSIZED_GUEST) {
return false;
} else {
#ifdef TARGET_SUPPORTS_MTTCG
return check_tcg_memory_orders_compatible();
#else
return false;
#endif
}
}
void qemu_tcg_configure(QemuOpts *opts, Error **errp)
{
const char *t = qemu_opt_get(opts, "thread");
if (t) {
if (strcmp(t, "multi") == 0) {
if (TCG_OVERSIZED_GUEST) {
error_setg(errp, "No MTTCG when guest word size > hosts");
} else {
if (!check_tcg_memory_orders_compatible()) {
error_report("Guest expects a stronger memory ordering "
"than the host provides");
error_printf("This may cause strange/hard to debug errors");
}
mttcg_enabled = true;
}
} else if (strcmp(t, "single") == 0) {
mttcg_enabled = false;
} else {
error_setg(errp, "Invalid 'thread' setting %s", t);
}
} else {
mttcg_enabled = default_mttcg_enabled();
}
}
int64_t cpu_get_icount_raw(void)
{
@ -694,6 +767,63 @@ void configure_icount(QemuOpts *opts, Error **errp)
NANOSECONDS_PER_SECOND / 10);
}
/***********************************************************/
/* TCG vCPU kick timer
*
* The kick timer is responsible for moving single threaded vCPU
* emulation on to the next vCPU. If more than one vCPU is running a
* timer event with force a cpu->exit so the next vCPU can get
* scheduled.
*
* The timer is removed if all vCPUs are idle and restarted again once
* idleness is complete.
*/
static QEMUTimer *tcg_kick_vcpu_timer;
static CPUState *tcg_current_rr_cpu;
#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
static inline int64_t qemu_tcg_next_kick(void)
{
return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
}
/* Kick the currently round-robin scheduled vCPU */
static void qemu_cpu_kick_rr_cpu(void)
{
CPUState *cpu;
do {
cpu = atomic_mb_read(&tcg_current_rr_cpu);
if (cpu) {
cpu_exit(cpu);
}
} while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
}
static void kick_tcg_thread(void *opaque)
{
timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
qemu_cpu_kick_rr_cpu();
}
static void start_tcg_kick_timer(void)
{
if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
kick_tcg_thread, NULL);
timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
}
}
static void stop_tcg_kick_timer(void)
{
if (tcg_kick_vcpu_timer) {
timer_del(tcg_kick_vcpu_timer);
tcg_kick_vcpu_timer = NULL;
}
}
/***********************************************************/
void hw_error(const char *fmt, ...)
{
@ -896,8 +1026,6 @@ static void qemu_kvm_init_cpu_signals(CPUState *cpu)
#endif /* _WIN32 */
static QemuMutex qemu_global_mutex;
static QemuCond qemu_io_proceeded_cond;
static unsigned iothread_requesting_mutex;
static QemuThread io_thread;
@ -911,7 +1039,6 @@ void qemu_init_cpu_loop(void)
qemu_init_sigbus();
qemu_cond_init(&qemu_cpu_cond);
qemu_cond_init(&qemu_pause_cond);
qemu_cond_init(&qemu_io_proceeded_cond);
qemu_mutex_init(&qemu_global_mutex);
qemu_thread_get_self(&io_thread);
@ -936,28 +1063,34 @@ static void qemu_tcg_destroy_vcpu(CPUState *cpu)
static void qemu_wait_io_event_common(CPUState *cpu)
{
atomic_mb_set(&cpu->thread_kicked, false);
if (cpu->stop) {
cpu->stop = false;
cpu->stopped = true;
qemu_cond_broadcast(&qemu_pause_cond);
}
process_queued_cpu_work(cpu);
cpu->thread_kicked = false;
}
static bool qemu_tcg_should_sleep(CPUState *cpu)
{
if (mttcg_enabled) {
return cpu_thread_is_idle(cpu);
} else {
return all_cpu_threads_idle();
}
}
static void qemu_tcg_wait_io_event(CPUState *cpu)
{
while (all_cpu_threads_idle()) {
while (qemu_tcg_should_sleep(cpu)) {
stop_tcg_kick_timer();
qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
}
while (iothread_requesting_mutex) {
qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
}
start_tcg_kick_timer();
CPU_FOREACH(cpu) {
qemu_wait_io_event_common(cpu);
}
qemu_wait_io_event_common(cpu);
}
static void qemu_kvm_wait_io_event(CPUState *cpu)
@ -1028,6 +1161,7 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
qemu_thread_get_self(cpu->thread);
cpu->thread_id = qemu_get_thread_id();
cpu->can_do_io = 1;
current_cpu = cpu;
sigemptyset(&waitset);
sigaddset(&waitset, SIG_IPI);
@ -1036,9 +1170,7 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
cpu->created = true;
qemu_cond_signal(&qemu_cpu_cond);
current_cpu = cpu;
while (1) {
current_cpu = NULL;
qemu_mutex_unlock_iothread();
do {
int sig;
@ -1049,7 +1181,6 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
exit(1);
}
qemu_mutex_lock_iothread();
current_cpu = cpu;
qemu_wait_io_event_common(cpu);
}
@ -1115,9 +1246,11 @@ static int tcg_cpu_exec(CPUState *cpu)
cpu->icount_decr.u16.low = decr;
cpu->icount_extra = count;
}
qemu_mutex_unlock_iothread();
cpu_exec_start(cpu);
ret = cpu_exec(cpu);
cpu_exec_end(cpu);
qemu_mutex_lock_iothread();
#ifdef CONFIG_PROFILER
tcg_time += profile_getclock() - ti;
#endif
@ -1150,7 +1283,16 @@ static void deal_with_unplugged_cpus(void)
}
}
static void *qemu_tcg_cpu_thread_fn(void *arg)
/* Single-threaded TCG
*
* In the single-threaded case each vCPU is simulated in turn. If
* there is more than a single vCPU we create a simple timer to kick
* the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
* This is done explicitly rather than relying on side-effects
* elsewhere.
*/
static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
{
CPUState *cpu = arg;
@ -1172,15 +1314,18 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
/* process any pending work */
CPU_FOREACH(cpu) {
current_cpu = cpu;
qemu_wait_io_event_common(cpu);
}
}
/* process any pending work */
atomic_mb_set(&exit_request, 1);
start_tcg_kick_timer();
cpu = first_cpu;
/* process any pending work */
cpu->exit_request = 1;
while (1) {
/* Account partial waits to QEMU_CLOCK_VIRTUAL. */
qemu_account_warp_timer();
@ -1189,7 +1334,10 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
cpu = first_cpu;
}
for (; cpu != NULL && !exit_request; cpu = CPU_NEXT(cpu)) {
while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
atomic_mb_set(&tcg_current_rr_cpu, cpu);
current_cpu = cpu;
qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
(cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
@ -1200,22 +1348,32 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
if (r == EXCP_DEBUG) {
cpu_handle_guest_debug(cpu);
break;
} else if (r == EXCP_ATOMIC) {
qemu_mutex_unlock_iothread();
cpu_exec_step_atomic(cpu);
qemu_mutex_lock_iothread();
break;
}
} else if (cpu->stop || cpu->stopped) {
} else if (cpu->stop) {
if (cpu->unplug) {
cpu = CPU_NEXT(cpu);
}
break;
}
} /* for cpu.. */
cpu = CPU_NEXT(cpu);
} /* while (cpu && !cpu->exit_request).. */
/* Pairs with smp_wmb in qemu_cpu_kick. */
atomic_mb_set(&exit_request, 0);
/* Does not need atomic_mb_set because a spurious wakeup is okay. */
atomic_set(&tcg_current_rr_cpu, NULL);
if (cpu && cpu->exit_request) {
atomic_mb_set(&cpu->exit_request, 0);
}
handle_icount_deadline();
qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
deal_with_unplugged_cpus();
}
@ -1262,6 +1420,68 @@ static void CALLBACK dummy_apc_func(ULONG_PTR unused)
}
#endif
/* Multi-threaded TCG
*
* In the multi-threaded case each vCPU has its own thread. The TLS
* variable current_cpu can be used deep in the code to find the
* current CPUState for a given thread.
*/
static void *qemu_tcg_cpu_thread_fn(void *arg)
{
CPUState *cpu = arg;
rcu_register_thread();
qemu_mutex_lock_iothread();
qemu_thread_get_self(cpu->thread);
cpu->thread_id = qemu_get_thread_id();
cpu->created = true;
cpu->can_do_io = 1;
current_cpu = cpu;
qemu_cond_signal(&qemu_cpu_cond);
/* process any pending work */
cpu->exit_request = 1;
while (1) {
if (cpu_can_run(cpu)) {
int r;
r = tcg_cpu_exec(cpu);
switch (r) {
case EXCP_DEBUG:
cpu_handle_guest_debug(cpu);
break;
case EXCP_HALTED:
/* during start-up the vCPU is reset and the thread is
* kicked several times. If we don't ensure we go back
* to sleep in the halted state we won't cleanly
* start-up when the vCPU is enabled.
*
* cpu->halted should ensure we sleep in wait_io_event
*/
g_assert(cpu->halted);
break;
case EXCP_ATOMIC:
qemu_mutex_unlock_iothread();
cpu_exec_step_atomic(cpu);
qemu_mutex_lock_iothread();
default:
/* Ignore everything else? */
break;
}
}
handle_icount_deadline();
atomic_mb_set(&cpu->exit_request, 0);
qemu_tcg_wait_io_event(cpu);
}
return NULL;
}
static void qemu_cpu_kick_thread(CPUState *cpu)
{
#ifndef _WIN32
@ -1287,24 +1507,13 @@ static void qemu_cpu_kick_thread(CPUState *cpu)
#endif
}
static void qemu_cpu_kick_no_halt(void)
{
CPUState *cpu;
/* Ensure whatever caused the exit has reached the CPU threads before
* writing exit_request.
*/
atomic_mb_set(&exit_request, 1);
cpu = atomic_mb_read(&tcg_current_cpu);
if (cpu) {
cpu_exit(cpu);
}
}
void qemu_cpu_kick(CPUState *cpu)
{
qemu_cond_broadcast(cpu->halt_cond);
if (tcg_enabled()) {
qemu_cpu_kick_no_halt();
cpu_exit(cpu);
/* NOP unless doing single-thread RR */
qemu_cpu_kick_rr_cpu();
} else {
if (hax_enabled()) {
/*
@ -1342,27 +1551,14 @@ bool qemu_mutex_iothread_locked(void)
void qemu_mutex_lock_iothread(void)
{
atomic_inc(&iothread_requesting_mutex);
/* In the simple case there is no need to bump the VCPU thread out of
* TCG code execution.
*/
if (!tcg_enabled() || qemu_in_vcpu_thread() ||
!first_cpu || !first_cpu->created) {
qemu_mutex_lock(&qemu_global_mutex);
atomic_dec(&iothread_requesting_mutex);
} else {
if (qemu_mutex_trylock(&qemu_global_mutex)) {
qemu_cpu_kick_no_halt();
qemu_mutex_lock(&qemu_global_mutex);
}
atomic_dec(&iothread_requesting_mutex);
qemu_cond_broadcast(&qemu_io_proceeded_cond);
}
g_assert(!qemu_mutex_iothread_locked());
qemu_mutex_lock(&qemu_global_mutex);
iothread_locked = true;
}
void qemu_mutex_unlock_iothread(void)
{
g_assert(qemu_mutex_iothread_locked());
iothread_locked = false;
qemu_mutex_unlock(&qemu_global_mutex);
}
@ -1392,13 +1588,6 @@ void pause_all_vcpus(void)
if (qemu_in_vcpu_thread()) {
cpu_stop_current();
if (!kvm_enabled()) {
CPU_FOREACH(cpu) {
cpu->stop = false;
cpu->stopped = true;
}
return;
}
}
while (!all_vcpus_paused()) {
@ -1447,29 +1636,43 @@ void cpu_remove_sync(CPUState *cpu)
static void qemu_tcg_init_vcpu(CPUState *cpu)
{
char thread_name[VCPU_THREAD_NAME_SIZE];
static QemuCond *tcg_halt_cond;
static QemuThread *tcg_cpu_thread;
static QemuCond *single_tcg_halt_cond;
static QemuThread *single_tcg_cpu_thread;
/* share a single thread for all cpus with TCG */
if (!tcg_cpu_thread) {
if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
cpu->thread = g_malloc0(sizeof(QemuThread));
cpu->halt_cond = g_malloc0(sizeof(QemuCond));
qemu_cond_init(cpu->halt_cond);
tcg_halt_cond = cpu->halt_cond;
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
if (qemu_tcg_mttcg_enabled()) {
/* create a thread per vCPU with TCG (MTTCG) */
parallel_cpus = true;
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
cpu->cpu_index);
qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
cpu, QEMU_THREAD_JOINABLE);
qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
cpu, QEMU_THREAD_JOINABLE);
} else {
/* share a single thread for all cpus with TCG */
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
qemu_thread_create(cpu->thread, thread_name,
qemu_tcg_rr_cpu_thread_fn,
cpu, QEMU_THREAD_JOINABLE);
single_tcg_halt_cond = cpu->halt_cond;
single_tcg_cpu_thread = cpu->thread;
}
#ifdef _WIN32
cpu->hThread = qemu_thread_get_handle(cpu->thread);
#endif
while (!cpu->created) {
qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
}
tcg_cpu_thread = cpu->thread;
} else {
cpu->thread = tcg_cpu_thread;
cpu->halt_cond = tcg_halt_cond;
/* For non-MTTCG cases we share the thread */
cpu->thread = single_tcg_cpu_thread;
cpu->halt_cond = single_tcg_halt_cond;
}
}

471
cputlb.c
View file

@ -18,6 +18,7 @@
*/
#include "qemu/osdep.h"
#include "qemu/main-loop.h"
#include "cpu.h"
#include "exec/exec-all.h"
#include "exec/memory.h"
@ -57,6 +58,40 @@
} \
} while (0)
#define assert_cpu_is_self(this_cpu) do { \
if (DEBUG_TLB_GATE) { \
g_assert(!cpu->created || qemu_cpu_is_self(cpu)); \
} \
} while (0)
/* run_on_cpu_data.target_ptr should always be big enough for a
* target_ulong even on 32 bit builds */
QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
/* We currently can't handle more than 16 bits in the MMUIDX bitmask.
*/
QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
#define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
/* flush_all_helper: run fn across all cpus
*
* If the wait flag is set then the src cpu's helper will be queued as
* "safe" work and the loop exited creating a synchronisation point
* where all queued work will be finished before execution starts
* again.
*/
static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
run_on_cpu_data d)
{
CPUState *cpu;
CPU_FOREACH(cpu) {
if (cpu != src) {
async_run_on_cpu(cpu, fn, d);
}
}
}
/* statistics */
int tlb_flush_count;
@ -65,10 +100,22 @@ int tlb_flush_count;
* flushing more entries than required is only an efficiency issue,
* not a correctness issue.
*/
void tlb_flush(CPUState *cpu)
static void tlb_flush_nocheck(CPUState *cpu)
{
CPUArchState *env = cpu->env_ptr;
/* The QOM tests will trigger tlb_flushes without setting up TCG
* so we bug out here in that case.
*/
if (!tcg_enabled()) {
return;
}
assert_cpu_is_self(cpu);
tlb_debug("(count: %d)\n", tlb_flush_count++);
tb_lock();
memset(env->tlb_table, -1, sizeof(env->tlb_table));
memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
@ -76,39 +123,117 @@ void tlb_flush(CPUState *cpu)
env->vtlb_index = 0;
env->tlb_flush_addr = -1;
env->tlb_flush_mask = 0;
tlb_flush_count++;
tb_unlock();
atomic_mb_set(&cpu->pending_tlb_flush, 0);
}
static inline void v_tlb_flush_by_mmuidx(CPUState *cpu, va_list argp)
static void tlb_flush_global_async_work(CPUState *cpu, run_on_cpu_data data)
{
tlb_flush_nocheck(cpu);
}
void tlb_flush(CPUState *cpu)
{
if (cpu->created && !qemu_cpu_is_self(cpu)) {
if (atomic_mb_read(&cpu->pending_tlb_flush) != ALL_MMUIDX_BITS) {
atomic_mb_set(&cpu->pending_tlb_flush, ALL_MMUIDX_BITS);
async_run_on_cpu(cpu, tlb_flush_global_async_work,
RUN_ON_CPU_NULL);
}
} else {
tlb_flush_nocheck(cpu);
}
}
void tlb_flush_all_cpus(CPUState *src_cpu)
{
const run_on_cpu_func fn = tlb_flush_global_async_work;
flush_all_helper(src_cpu, fn, RUN_ON_CPU_NULL);
fn(src_cpu, RUN_ON_CPU_NULL);
}
void tlb_flush_all_cpus_synced(CPUState *src_cpu)
{
const run_on_cpu_func fn = tlb_flush_global_async_work;
flush_all_helper(src_cpu, fn, RUN_ON_CPU_NULL);
async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_NULL);
}
static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
{
CPUArchState *env = cpu->env_ptr;
unsigned long mmu_idx_bitmask = data.host_int;
int mmu_idx;
tlb_debug("start\n");
assert_cpu_is_self(cpu);
for (;;) {
int mmu_idx = va_arg(argp, int);
tb_lock();
if (mmu_idx < 0) {
break;
tlb_debug("start: mmu_idx:0x%04lx\n", mmu_idx_bitmask);
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
if (test_bit(mmu_idx, &mmu_idx_bitmask)) {
tlb_debug("%d\n", mmu_idx);
memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[0]));
memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
}
tlb_debug("%d\n", mmu_idx);
memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[0]));
memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
}
memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
tlb_debug("done\n");
tb_unlock();
}
void tlb_flush_by_mmuidx(CPUState *cpu, ...)
void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
{
va_list argp;
va_start(argp, cpu);
v_tlb_flush_by_mmuidx(cpu, argp);
va_end(argp);
tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
if (!qemu_cpu_is_self(cpu)) {
uint16_t pending_flushes = idxmap;
pending_flushes &= ~atomic_mb_read(&cpu->pending_tlb_flush);
if (pending_flushes) {
tlb_debug("reduced mmu_idx: 0x%" PRIx16 "\n", pending_flushes);
atomic_or(&cpu->pending_tlb_flush, pending_flushes);
async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
RUN_ON_CPU_HOST_INT(pending_flushes));
}
} else {
tlb_flush_by_mmuidx_async_work(cpu,
RUN_ON_CPU_HOST_INT(idxmap));
}
}
void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
{
const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
}
void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
uint16_t idxmap)
{
const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
}
static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
{
if (addr == (tlb_entry->addr_read &
@ -121,12 +246,15 @@ static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
}
}
void tlb_flush_page(CPUState *cpu, target_ulong addr)
static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data)
{
CPUArchState *env = cpu->env_ptr;
target_ulong addr = (target_ulong) data.target_ptr;
int i;
int mmu_idx;
assert_cpu_is_self(cpu);
tlb_debug("page :" TARGET_FMT_lx "\n", addr);
/* Check if we need to flush due to large pages. */
@ -156,15 +284,62 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
tb_flush_jmp_cache(cpu, addr);
}
void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...)
void tlb_flush_page(CPUState *cpu, target_ulong addr)
{
tlb_debug("page :" TARGET_FMT_lx "\n", addr);
if (!qemu_cpu_is_self(cpu)) {
async_run_on_cpu(cpu, tlb_flush_page_async_work,
RUN_ON_CPU_TARGET_PTR(addr));
} else {
tlb_flush_page_async_work(cpu, RUN_ON_CPU_TARGET_PTR(addr));
}
}
/* As we are going to hijack the bottom bits of the page address for a
* mmuidx bit mask we need to fail to build if we can't do that
*/
QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
run_on_cpu_data data)
{
CPUArchState *env = cpu->env_ptr;
int i, k;
va_list argp;
target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
int page = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
int mmu_idx;
int i;
va_start(argp, addr);
assert_cpu_is_self(cpu);
tlb_debug("addr "TARGET_FMT_lx"\n", addr);
tlb_debug("page:%d addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n",
page, addr, mmu_idx_bitmap);
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
tlb_flush_entry(&env->tlb_table[mmu_idx][page], addr);
/* check whether there are vltb entries that need to be flushed */
for (i = 0; i < CPU_VTLB_SIZE; i++) {
tlb_flush_entry(&env->tlb_v_table[mmu_idx][i], addr);
}
}
}
tb_flush_jmp_cache(cpu, addr);
}
static void tlb_check_page_and_flush_by_mmuidx_async_work(CPUState *cpu,
run_on_cpu_data data)
{
CPUArchState *env = cpu->env_ptr;
target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
tlb_debug("addr:"TARGET_FMT_lx" mmu_idx: %04lx\n", addr, mmu_idx_bitmap);
/* Check if we need to flush due to large pages. */
if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
@ -172,33 +347,80 @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...)
TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
env->tlb_flush_addr, env->tlb_flush_mask);
v_tlb_flush_by_mmuidx(cpu, argp);
va_end(argp);
return;
tlb_flush_by_mmuidx_async_work(cpu,
RUN_ON_CPU_HOST_INT(mmu_idx_bitmap));
} else {
tlb_flush_page_by_mmuidx_async_work(cpu, data);
}
}
addr &= TARGET_PAGE_MASK;
i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
{
target_ulong addr_and_mmu_idx;
for (;;) {
int mmu_idx = va_arg(argp, int);
tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
if (mmu_idx < 0) {
break;
}
/* This should already be page aligned */
addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
addr_and_mmu_idx |= idxmap;
tlb_debug("idx %d\n", mmu_idx);
tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
/* check whether there are vltb entries that need to be flushed */
for (k = 0; k < CPU_VTLB_SIZE; k++) {
tlb_flush_entry(&env->tlb_v_table[mmu_idx][k], addr);
}
if (!qemu_cpu_is_self(cpu)) {
async_run_on_cpu(cpu, tlb_check_page_and_flush_by_mmuidx_async_work,
RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
} else {
tlb_check_page_and_flush_by_mmuidx_async_work(
cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
}
va_end(argp);
}
tb_flush_jmp_cache(cpu, addr);
void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
uint16_t idxmap)
{
const run_on_cpu_func fn = tlb_check_page_and_flush_by_mmuidx_async_work;
target_ulong addr_and_mmu_idx;
tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
/* This should already be page aligned */
addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
addr_and_mmu_idx |= idxmap;
flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
}
void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
target_ulong addr,
uint16_t idxmap)
{
const run_on_cpu_func fn = tlb_check_page_and_flush_by_mmuidx_async_work;
target_ulong addr_and_mmu_idx;
tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
/* This should already be page aligned */
addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
addr_and_mmu_idx |= idxmap;
flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
}
void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
{
const run_on_cpu_func fn = tlb_flush_page_async_work;
flush_all_helper(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
fn(src, RUN_ON_CPU_TARGET_PTR(addr));
}
void tlb_flush_page_all_cpus_synced(CPUState *src,
target_ulong addr)
{
const run_on_cpu_func fn = tlb_flush_page_async_work;
flush_all_helper(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
async_safe_run_on_cpu(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
}
/* update the TLBs so that writes to code in the virtual page 'addr'
@ -216,36 +438,84 @@ void tlb_unprotect_code(ram_addr_t ram_addr)
cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
}
static bool tlb_is_dirty_ram(CPUTLBEntry *tlbe)
{
return (tlbe->addr_write & (TLB_INVALID_MASK|TLB_MMIO|TLB_NOTDIRTY)) == 0;
}
void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
/*
* Dirty write flag handling
*
* When the TCG code writes to a location it looks up the address in
* the TLB and uses that data to compute the final address. If any of
* the lower bits of the address are set then the slow path is forced.
* There are a number of reasons to do this but for normal RAM the
* most usual is detecting writes to code regions which may invalidate
* generated code.
*
* Because we want other vCPUs to respond to changes straight away we
* update the te->addr_write field atomically. If the TLB entry has
* been changed by the vCPU in the mean time we skip the update.
*
* As this function uses atomic accesses we also need to ensure
* updates to tlb_entries follow the same access rules. We don't need
* to worry about this for oversized guests as MTTCG is disabled for
* them.
*/
static void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
uintptr_t length)
{
uintptr_t addr;
#if TCG_OVERSIZED_GUEST
uintptr_t addr = tlb_entry->addr_write;
if (tlb_is_dirty_ram(tlb_entry)) {
addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) {
addr &= TARGET_PAGE_MASK;
addr += tlb_entry->addend;
if ((addr - start) < length) {
tlb_entry->addr_write |= TLB_NOTDIRTY;
}
}
}
#else
/* paired with atomic_mb_set in tlb_set_page_with_attrs */
uintptr_t orig_addr = atomic_mb_read(&tlb_entry->addr_write);
uintptr_t addr = orig_addr;
static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
{
ram_addr_t ram_addr;
ram_addr = qemu_ram_addr_from_host(ptr);
if (ram_addr == RAM_ADDR_INVALID) {
fprintf(stderr, "Bad ram pointer %p\n", ptr);
abort();
if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) {
addr &= TARGET_PAGE_MASK;
addr += atomic_read(&tlb_entry->addend);
if ((addr - start) < length) {
uintptr_t notdirty_addr = orig_addr | TLB_NOTDIRTY;
atomic_cmpxchg(&tlb_entry->addr_write, orig_addr, notdirty_addr);
}
}
return ram_addr;
#endif
}
/* For atomic correctness when running MTTCG we need to use the right
* primitives when copying entries */
static inline void copy_tlb_helper(CPUTLBEntry *d, CPUTLBEntry *s,
bool atomic_set)
{
#if TCG_OVERSIZED_GUEST
*d = *s;
#else
if (atomic_set) {
d->addr_read = s->addr_read;
d->addr_code = s->addr_code;
atomic_set(&d->addend, atomic_read(&s->addend));
/* Pairs with flag setting in tlb_reset_dirty_range */
atomic_mb_set(&d->addr_write, atomic_read(&s->addr_write));
} else {
d->addr_read = s->addr_read;
d->addr_write = atomic_read(&s->addr_write);
d->addr_code = s->addr_code;
d->addend = atomic_read(&s->addend);
}
#endif
}
/* This is a cross vCPU call (i.e. another vCPU resetting the flags of
* the target vCPU). As such care needs to be taken that we don't
* dangerously race with another vCPU update. The only thing actually
* updated is the target TLB entry ->addr_write flags.
*/
void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
{
CPUArchState *env;
@ -283,6 +553,8 @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
int i;
int mmu_idx;
assert_cpu_is_self(cpu);
vaddr &= TARGET_PAGE_MASK;
i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
@ -337,11 +609,12 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
target_ulong address;
target_ulong code_address;
uintptr_t addend;
CPUTLBEntry *te;
CPUTLBEntry *te, *tv, tn;
hwaddr iotlb, xlat, sz;
unsigned vidx = env->vtlb_index++ % CPU_VTLB_SIZE;
int asidx = cpu_asidx_from_attrs(cpu, attrs);
assert_cpu_is_self(cpu);
assert(size >= TARGET_PAGE_SIZE);
if (size != TARGET_PAGE_SIZE) {
tlb_add_large_page(env, vaddr, size);
@ -371,41 +644,50 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
te = &env->tlb_table[mmu_idx][index];
/* do not discard the translation in te, evict it into a victim tlb */
env->tlb_v_table[mmu_idx][vidx] = *te;
tv = &env->tlb_v_table[mmu_idx][vidx];
/* addr_write can race with tlb_reset_dirty_range */
copy_tlb_helper(tv, te, true);
env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
/* refill the tlb */
env->iotlb[mmu_idx][index].addr = iotlb - vaddr;
env->iotlb[mmu_idx][index].attrs = attrs;
te->addend = addend - vaddr;
/* Now calculate the new entry */
tn.addend = addend - vaddr;
if (prot & PAGE_READ) {
te->addr_read = address;
tn.addr_read = address;
} else {
te->addr_read = -1;
tn.addr_read = -1;
}
if (prot & PAGE_EXEC) {
te->addr_code = code_address;
tn.addr_code = code_address;
} else {
te->addr_code = -1;
tn.addr_code = -1;
}
tn.addr_write = -1;
if (prot & PAGE_WRITE) {
if ((memory_region_is_ram(section->mr) && section->readonly)
|| memory_region_is_romd(section->mr)) {
/* Write access calls the I/O callback. */
te->addr_write = address | TLB_MMIO;
tn.addr_write = address | TLB_MMIO;
} else if (memory_region_is_ram(section->mr)
&& cpu_physical_memory_is_clean(
memory_region_get_ram_addr(section->mr) + xlat)) {
te->addr_write = address | TLB_NOTDIRTY;
tn.addr_write = address | TLB_NOTDIRTY;
} else {
te->addr_write = address;
tn.addr_write = address;
}
} else {
te->addr_write = -1;
}
/* Pairs with flag setting in tlb_reset_dirty_range */
copy_tlb_helper(te, &tn, true);
/* atomic_mb_set(&te->addr_write, write_address); */
}
/* Add a new TLB entry, but without specifying the memory
@ -452,6 +734,18 @@ static void report_bad_exec(CPUState *cpu, target_ulong addr)
log_cpu_state_mask(LOG_GUEST_ERROR, cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
}
static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
{
ram_addr_t ram_addr;
ram_addr = qemu_ram_addr_from_host(ptr);
if (ram_addr == RAM_ADDR_INVALID) {
error_report("Bad ram pointer %p", ptr);
abort();
}
return ram_addr;
}
/* NOTE: this function can trigger an exception */
/* NOTE2: the returned address is not exactly the physical address: it
* is actually a ram_addr_t (in system mode; the user mode emulation
@ -495,6 +789,7 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
hwaddr physaddr = iotlbentry->addr;
MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
uint64_t val;
bool locked = false;
physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
cpu->mem_io_pc = retaddr;
@ -503,7 +798,16 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
}
cpu->mem_io_vaddr = addr;
if (mr->global_locking) {
qemu_mutex_lock_iothread();
locked = true;
}
memory_region_dispatch_read(mr, physaddr, &val, size, iotlbentry->attrs);
if (locked) {
qemu_mutex_unlock_iothread();
}
return val;
}
@ -514,15 +818,23 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
CPUState *cpu = ENV_GET_CPU(env);
hwaddr physaddr = iotlbentry->addr;
MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
bool locked = false;
physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
cpu_io_recompile(cpu, retaddr);
}
cpu->mem_io_vaddr = addr;
cpu->mem_io_pc = retaddr;
if (mr->global_locking) {
qemu_mutex_lock_iothread();
locked = true;
}
memory_region_dispatch_write(mr, physaddr, val, size, iotlbentry->attrs);
if (locked) {
qemu_mutex_unlock_iothread();
}
}
/* Return true if ADDR is present in the victim tlb, and has been copied
@ -538,10 +850,13 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
if (cmp == page) {
/* Found entry in victim tlb, swap tlb and iotlb. */
CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index];
copy_tlb_helper(&tmptlb, tlb, false);
copy_tlb_helper(tlb, vtlb, true);
copy_tlb_helper(vtlb, &tmptlb, true);
CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index];
CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx];
tmptlb = *tlb; *tlb = *vtlb; *vtlb = tmptlb;
tmpio = *io; *io = *vio; *vio = tmpio;
return true;
}

350
docs/multi-thread-tcg.txt Normal file
View file

@ -0,0 +1,350 @@
Copyright (c) 2015-2016 Linaro Ltd.
This work is licensed under the terms of the GNU GPL, version 2 or
later. See the COPYING file in the top-level directory.
Introduction
============
This document outlines the design for multi-threaded TCG system-mode
emulation. The current user-mode emulation mirrors the thread
structure of the translated executable. Some of the work will be
applicable to both system and linux-user emulation.
The original system-mode TCG implementation was single threaded and
dealt with multiple CPUs with simple round-robin scheduling. This
simplified a lot of things but became increasingly limited as systems
being emulated gained additional cores and per-core performance gains
for host systems started to level off.
vCPU Scheduling
===============
We introduce a new running mode where each vCPU will run on its own
user-space thread. This will be enabled by default for all FE/BE
combinations that have had the required work done to support this
safely.
In the general case of running translated code there should be no
inter-vCPU dependencies and all vCPUs should be able to run at full
speed. Synchronisation will only be required while accessing internal
shared data structures or when the emulated architecture requires a
coherent representation of the emulated machine state.
Shared Data Structures
======================
Main Run Loop
-------------
Even when there is no code being generated there are a number of
structures associated with the hot-path through the main run-loop.
These are associated with looking up the next translation block to
execute. These include:
tb_jmp_cache (per-vCPU, cache of recent jumps)
tb_ctx.htable (global hash table, phys address->tb lookup)
As TB linking only occurs when blocks are in the same page this code
is critical to performance as looking up the next TB to execute is the
most common reason to exit the generated code.
DESIGN REQUIREMENT: Make access to lookup structures safe with
multiple reader/writer threads. Minimise any lock contention to do it.
The hot-path avoids using locks where possible. The tb_jmp_cache is
updated with atomic accesses to ensure consistent results. The fall
back QHT based hash table is also designed for lockless lookups. Locks
are only taken when code generation is required or TranslationBlocks
have their block-to-block jumps patched.
Global TCG State
----------------
We need to protect the entire code generation cycle including any post
generation patching of the translated code. This also implies a shared
translation buffer which contains code running on all cores. Any
execution path that comes to the main run loop will need to hold a
mutex for code generation. This also includes times when we need flush
code or entries from any shared lookups/caches. Structures held on a
per-vCPU basis won't need locking unless other vCPUs will need to
modify them.
DESIGN REQUIREMENT: Add locking around all code generation and TB
patching.
(Current solution)
Mainly as part of the linux-user work all code generation is
serialised with a tb_lock(). For the SoftMMU tb_lock() also takes the
place of mmap_lock() in linux-user.
Translation Blocks
------------------
Currently the whole system shares a single code generation buffer
which when full will force a flush of all translations and start from
scratch again. Some operations also force a full flush of translations
including:
- debugging operations (breakpoint insertion/removal)
- some CPU helper functions
This is done with the async_safe_run_on_cpu() mechanism to ensure all
vCPUs are quiescent when changes are being made to shared global
structures.
More granular translation invalidation events are typically due
to a change of the state of a physical page:
- code modification (self modify code, patching code)
- page changes (new page mapping in linux-user mode)
While setting the invalid flag in a TranslationBlock will stop it
being used when looked up in the hot-path there are a number of other
book-keeping structures that need to be safely cleared.
Any TranslationBlocks which have been patched to jump directly to the
now invalid blocks need the jump patches reversing so they will return
to the C code.
There are a number of look-up caches that need to be properly updated
including the:
- jump lookup cache
- the physical-to-tb lookup hash table
- the global page table
The global page table (l1_map) which provides a multi-level look-up
for PageDesc structures which contain pointers to the start of a
linked list of all Translation Blocks in that page (see page_next).
Both the jump patching and the page cache involve linked lists that
the invalidated TranslationBlock needs to be removed from.
DESIGN REQUIREMENT: Safely handle invalidation of TBs
- safely patch/revert direct jumps
- remove central PageDesc lookup entries
- ensure lookup caches/hashes are safely updated
(Current solution)
The direct jump themselves are updated atomically by the TCG
tb_set_jmp_target() code. Modification to the linked lists that allow
searching for linked pages are done under the protect of the
tb_lock().
The global page table is protected by the tb_lock() in system-mode and
mmap_lock() in linux-user mode.
The lookup caches are updated atomically and the lookup hash uses QHT
which is designed for concurrent safe lookup.
Memory maps and TLBs
--------------------
The memory handling code is fairly critical to the speed of memory
access in the emulated system. The SoftMMU code is designed so the
hot-path can be handled entirely within translated code. This is
handled with a per-vCPU TLB structure which once populated will allow
a series of accesses to the page to occur without exiting the
translated code. It is possible to set flags in the TLB address which
will ensure the slow-path is taken for each access. This can be done
to support:
- Memory regions (dividing up access to PIO, MMIO and RAM)
- Dirty page tracking (for code gen, SMC detection, migration and display)
- Virtual TLB (for translating guest address->real address)
When the TLB tables are updated by a vCPU thread other than their own
we need to ensure it is done in a safe way so no inconsistent state is
seen by the vCPU thread.
Some operations require updating a number of vCPUs TLBs at the same
time in a synchronised manner.
DESIGN REQUIREMENTS:
- TLB Flush All/Page
- can be across-vCPUs
- cross vCPU TLB flush may need other vCPU brought to halt
- change may need to be visible to the calling vCPU immediately
- TLB Flag Update
- usually cross-vCPU
- want change to be visible as soon as possible
- TLB Update (update a CPUTLBEntry, via tlb_set_page_with_attrs)
- This is a per-vCPU table - by definition can't race
- updated by its own thread when the slow-path is forced
(Current solution)
We have updated cputlb.c to defer operations when a cross-vCPU
operation with async_run_on_cpu() which ensures each vCPU sees a
coherent state when it next runs its work (in a few instructions
time).
A new set up operations (tlb_flush_*_all_cpus) take an additional flag
which when set will force synchronisation by setting the source vCPUs
work as "safe work" and exiting the cpu run loop. This ensure by the
time execution restarts all flush operations have completed.
TLB flag updates are all done atomically and are also protected by the
tb_lock() which is used by the functions that update the TLB in bulk.
(Known limitation)
Not really a limitation but the wait mechanism is overly strict for
some architectures which only need flushes completed by a barrier
instruction. This could be a future optimisation.
Emulated hardware state
-----------------------
Currently thanks to KVM work any access to IO memory is automatically
protected by the global iothread mutex, also known as the BQL (Big
Qemu Lock). Any IO region that doesn't use global mutex is expected to
do its own locking.
However IO memory isn't the only way emulated hardware state can be
modified. Some architectures have model specific registers that
trigger hardware emulation features. Generally any translation helper
that needs to update more than a single vCPUs of state should take the
BQL.
As the BQL, or global iothread mutex is shared across the system we
push the use of the lock as far down into the TCG code as possible to
minimise contention.
(Current solution)
MMIO access automatically serialises hardware emulation by way of the
BQL. Currently ARM targets serialise all ARM_CP_IO register accesses
and also defer the reset/startup of vCPUs to the vCPU context by way
of async_run_on_cpu().
Updates to interrupt state are also protected by the BQL as they can
often be cross vCPU.
Memory Consistency
==================
Between emulated guests and host systems there are a range of memory
consistency models. Even emulating weakly ordered systems on strongly
ordered hosts needs to ensure things like store-after-load re-ordering
can be prevented when the guest wants to.
Memory Barriers
---------------
Barriers (sometimes known as fences) provide a mechanism for software
to enforce a particular ordering of memory operations from the point
of view of external observers (e.g. another processor core). They can
apply to any memory operations as well as just loads or stores.
The Linux kernel has an excellent write-up on the various forms of
memory barrier and the guarantees they can provide [1].
Barriers are often wrapped around synchronisation primitives to
provide explicit memory ordering semantics. However they can be used
by themselves to provide safe lockless access by ensuring for example
a change to a signal flag will only be visible once the changes to
payload are.
DESIGN REQUIREMENT: Add a new tcg_memory_barrier op
This would enforce a strong load/store ordering so all loads/stores
complete at the memory barrier. On single-core non-SMP strongly
ordered backends this could become a NOP.
Aside from explicit standalone memory barrier instructions there are
also implicit memory ordering semantics which comes with each guest
memory access instruction. For example all x86 load/stores come with
fairly strong guarantees of sequential consistency where as ARM has
special variants of load/store instructions that imply acquire/release
semantics.
In the case of a strongly ordered guest architecture being emulated on
a weakly ordered host the scope for a heavy performance impact is
quite high.
DESIGN REQUIREMENTS: Be efficient with use of memory barriers
- host systems with stronger implied guarantees can skip some barriers
- merge consecutive barriers to the strongest one
(Current solution)
The system currently has a tcg_gen_mb() which will add memory barrier
operations if code generation is being done in a parallel context. The
tcg_optimize() function attempts to merge barriers up to their
strongest form before any load/store operations. The solution was
originally developed and tested for linux-user based systems. All
backends have been converted to emit fences when required. So far the
following front-ends have been updated to emit fences when required:
- target-i386
- target-arm
- target-aarch64
- target-alpha
- target-mips
Memory Control and Maintenance
------------------------------
This includes a class of instructions for controlling system cache
behaviour. While QEMU doesn't model cache behaviour these instructions
are often seen when code modification has taken place to ensure the
changes take effect.
Synchronisation Primitives
--------------------------
There are two broad types of synchronisation primitives found in
modern ISAs: atomic instructions and exclusive regions.
The first type offer a simple atomic instruction which will guarantee
some sort of test and conditional store will be truly atomic w.r.t.
other cores sharing access to the memory. The classic example is the
x86 cmpxchg instruction.
The second type offer a pair of load/store instructions which offer a
guarantee that an region of memory has not been touched between the
load and store instructions. An example of this is ARM's ldrex/strex
pair where the strex instruction will return a flag indicating a
successful store only if no other CPU has accessed the memory region
since the ldrex.
Traditionally TCG has generated a series of operations that work
because they are within the context of a single translation block so
will have completed before another CPU is scheduled. However with
the ability to have multiple threads running to emulate multiple CPUs
we will need to explicitly expose these semantics.
DESIGN REQUIREMENTS:
- Support classic atomic instructions
- Support load/store exclusive (or load link/store conditional) pairs
- Generic enough infrastructure to support all guest architectures
CURRENT OPEN QUESTIONS:
- How problematic is the ABA problem in general?
(Current solution)
The TCG provides a number of atomic helpers (tcg_gen_atomic_*) which
can be used directly or combined to emulate other instructions like
ARM's ldrex/strex instructions. While they are susceptible to the ABA
problem so far common guests have not implemented patterns where
this may be a problem - typically presenting a locking ABI which
assumes cmpxchg like semantics.
The code also includes a fall-back for cases where multi-threaded TCG
ops can't work (e.g. guest atomic width > host atomic width). In this
case an EXCP_ATOMIC exit occurs and the instruction is emulated with
an exclusive lock which ensures all emulation is serialised.
While the atomic helpers look good enough for now there may be a need
to look at solutions that can more closely model the guest
architectures semantics.
==========
[1] https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/plain/Documentation/memory-barriers.txt

12
exec.c
View file

@ -2134,9 +2134,9 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
}
cpu->watchpoint_hit = wp;
/* The tb_lock will be reset when cpu_loop_exit or
* cpu_loop_exit_noexc longjmp back into the cpu_exec
* main loop.
/* Both tb_lock and iothread_mutex will be reset when
* cpu_loop_exit or cpu_loop_exit_noexc longjmp
* back into the cpu_exec main loop.
*/
tb_lock();
tb_check_watchpoint(cpu);
@ -2371,8 +2371,14 @@ static void io_mem_init(void)
memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
NULL, UINT64_MAX);
/* io_mem_notdirty calls tb_invalidate_phys_page_fast,
* which can be called without the iothread mutex.
*/
memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
NULL, UINT64_MAX);
memory_region_clear_global_locking(&io_mem_notdirty);
memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
NULL, UINT64_MAX);
}

View file

@ -22,6 +22,7 @@
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "qemu/main-loop.h"
#include "qemu-common.h"
#include "hw/irq.h"
#include "qom/object.h"

View file

@ -457,8 +457,8 @@ static void patch_instruction(VAPICROMState *s, X86CPU *cpu, target_ulong ip)
resume_all_vcpus();
if (!kvm_enabled()) {
/* tb_lock will be reset when cpu_loop_exit_noexc longjmps
* back into the cpu_exec loop. */
/* Both tb_lock and iothread_mutex will be reset when
* longjmps back into the cpu_exec loop. */
tb_lock();
tb_gen_code(cs, current_pc, current_cs_base, current_flags, 1);
cpu_loop_exit_noexc(cs);

View file

@ -14,6 +14,7 @@
#include "qemu/osdep.h"
#include "qemu/bitops.h"
#include "qemu/main-loop.h"
#include "trace.h"
#include "gicv3_internal.h"
#include "cpu.h"
@ -733,6 +734,8 @@ void gicv3_cpuif_update(GICv3CPUState *cs)
ARMCPU *cpu = ARM_CPU(cs->cpu);
CPUARMState *env = &cpu->env;
g_assert(qemu_mutex_iothread_locked());
trace_gicv3_cpuif_update(gicv3_redist_affid(cs), cs->hppi.irq,
cs->hppi.grp, cs->hppi.prio);

View file

@ -14,6 +14,7 @@
#include "qemu/bitops.h"
#include "qemu/log.h"
#include "arm-powerctl.h"
#include "qom/cpu.h"
#ifndef DEBUG_IMX6_SRC
#define DEBUG_IMX6_SRC 0
@ -113,6 +114,45 @@ static uint64_t imx6_src_read(void *opaque, hwaddr offset, unsigned size)
return value;
}
/* The reset is asynchronous so we need to defer clearing the reset
* bit until the work is completed.
*/
struct SRCSCRResetInfo {
IMX6SRCState *s;
int reset_bit;
};
static void imx6_clear_reset_bit(CPUState *cpu, run_on_cpu_data data)
{
struct SRCSCRResetInfo *ri = data.host_ptr;
IMX6SRCState *s = ri->s;
assert(qemu_mutex_iothread_locked());
s->regs[SRC_SCR] = deposit32(s->regs[SRC_SCR], ri->reset_bit, 1, 0);
DPRINTF("reg[%s] <= 0x%" PRIx32 "\n",
imx6_src_reg_name(SRC_SCR), s->regs[SRC_SCR]);
g_free(ri);
}
static void imx6_defer_clear_reset_bit(int cpuid,
IMX6SRCState *s,
unsigned long reset_shift)
{
struct SRCSCRResetInfo *ri;
ri = g_malloc(sizeof(struct SRCSCRResetInfo));
ri->s = s;
ri->reset_bit = reset_shift;
async_run_on_cpu(arm_get_cpu_by_id(cpuid), imx6_clear_reset_bit,
RUN_ON_CPU_HOST_PTR(ri));
}
static void imx6_src_write(void *opaque, hwaddr offset, uint64_t value,
unsigned size)
{
@ -153,7 +193,7 @@ static void imx6_src_write(void *opaque, hwaddr offset, uint64_t value,
arm_set_cpu_off(3);
}
/* We clear the reset bits as the processor changed state */
clear_bit(CORE3_RST_SHIFT, &current_value);
imx6_defer_clear_reset_bit(3, s, CORE3_RST_SHIFT);
clear_bit(CORE3_RST_SHIFT, &change_mask);
}
if (EXTRACT(change_mask, CORE2_ENABLE)) {
@ -162,11 +202,11 @@ static void imx6_src_write(void *opaque, hwaddr offset, uint64_t value,
arm_set_cpu_on(2, s->regs[SRC_GPR5], s->regs[SRC_GPR6],
3, false);
} else {
/* CORE 3 is shut down */
/* CORE 2 is shut down */
arm_set_cpu_off(2);
}
/* We clear the reset bits as the processor changed state */
clear_bit(CORE2_RST_SHIFT, &current_value);
imx6_defer_clear_reset_bit(2, s, CORE2_RST_SHIFT);
clear_bit(CORE2_RST_SHIFT, &change_mask);
}
if (EXTRACT(change_mask, CORE1_ENABLE)) {
@ -175,28 +215,28 @@ static void imx6_src_write(void *opaque, hwaddr offset, uint64_t value,
arm_set_cpu_on(1, s->regs[SRC_GPR3], s->regs[SRC_GPR4],
3, false);
} else {
/* CORE 3 is shut down */
/* CORE 1 is shut down */
arm_set_cpu_off(1);
}
/* We clear the reset bits as the processor changed state */
clear_bit(CORE1_RST_SHIFT, &current_value);
imx6_defer_clear_reset_bit(1, s, CORE1_RST_SHIFT);
clear_bit(CORE1_RST_SHIFT, &change_mask);
}
if (EXTRACT(change_mask, CORE0_RST)) {
arm_reset_cpu(0);
clear_bit(CORE0_RST_SHIFT, &current_value);
imx6_defer_clear_reset_bit(0, s, CORE0_RST_SHIFT);
}
if (EXTRACT(change_mask, CORE1_RST)) {
arm_reset_cpu(1);
clear_bit(CORE1_RST_SHIFT, &current_value);
imx6_defer_clear_reset_bit(1, s, CORE1_RST_SHIFT);
}
if (EXTRACT(change_mask, CORE2_RST)) {
arm_reset_cpu(2);
clear_bit(CORE2_RST_SHIFT, &current_value);
imx6_defer_clear_reset_bit(2, s, CORE2_RST_SHIFT);
}
if (EXTRACT(change_mask, CORE3_RST)) {
arm_reset_cpu(3);
clear_bit(CORE3_RST_SHIFT, &current_value);
imx6_defer_clear_reset_bit(3, s, CORE3_RST_SHIFT);
}
if (EXTRACT(change_mask, SW_IPU2_RST)) {
/* We pretend the IPU2 is reset */

View file

@ -62,7 +62,16 @@ void ppc_set_irq(PowerPCCPU *cpu, int n_IRQ, int level)
{
CPUState *cs = CPU(cpu);
CPUPPCState *env = &cpu->env;
unsigned int old_pending = env->pending_interrupts;
unsigned int old_pending;
bool locked = false;
/* We may already have the BQL if coming from the reset path */
if (!qemu_mutex_iothread_locked()) {
locked = true;
qemu_mutex_lock_iothread();
}
old_pending = env->pending_interrupts;
if (level) {
env->pending_interrupts |= 1 << n_IRQ;
@ -80,9 +89,14 @@ void ppc_set_irq(PowerPCCPU *cpu, int n_IRQ, int level)
#endif
}
LOG_IRQ("%s: %p n_IRQ %d level %d => pending %08" PRIx32
"req %08x\n", __func__, env, n_IRQ, level,
env->pending_interrupts, CPU(cpu)->interrupt_request);
if (locked) {
qemu_mutex_unlock_iothread();
}
}
/* PowerPC 6xx / 7xx internal IRQ controller */

View file

@ -1010,6 +1010,9 @@ static void emulate_spapr_hypercall(PPCVirtualHypervisor *vhyp,
{
CPUPPCState *env = &cpu->env;
/* The TCG path should also be holding the BQL at this point */
g_assert(qemu_mutex_iothread_locked());
if (msr_pr) {
hcall_dprintf("Hypercall made with MSR[PR]=1\n");
env->gpr[3] = H_PRIVILEGE;

View file

@ -23,8 +23,6 @@
/* cputlb.c */
void tlb_protect_code(ram_addr_t ram_addr);
void tlb_unprotect_code(ram_addr_t ram_addr);
void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
uintptr_t length);
extern int tlb_flush_count;
#endif

View file

@ -92,6 +92,27 @@ void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx);
* MMU indexes.
*/
void tlb_flush_page(CPUState *cpu, target_ulong addr);
/**
* tlb_flush_page_all_cpus:
* @cpu: src CPU of the flush
* @addr: virtual address of page to be flushed
*
* Flush one page from the TLB of the specified CPU, for all
* MMU indexes.
*/
void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr);
/**
* tlb_flush_page_all_cpus_synced:
* @cpu: src CPU of the flush
* @addr: virtual address of page to be flushed
*
* Flush one page from the TLB of the specified CPU, for all MMU
* indexes like tlb_flush_page_all_cpus except the source vCPUs work
* is scheduled as safe work meaning all flushes will be complete once
* the source vCPUs safe work is complete. This will depend on when
* the guests translation ends the TB.
*/
void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr);
/**
* tlb_flush:
* @cpu: CPU whose TLB should be flushed
@ -102,25 +123,88 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr);
* use one of the other functions for efficiency.
*/
void tlb_flush(CPUState *cpu);
/**
* tlb_flush_all_cpus:
* @cpu: src CPU of the flush
*/
void tlb_flush_all_cpus(CPUState *src_cpu);
/**
* tlb_flush_all_cpus_synced:
* @cpu: src CPU of the flush
*
* Like tlb_flush_all_cpus except this except the source vCPUs work is
* scheduled as safe work meaning all flushes will be complete once
* the source vCPUs safe work is complete. This will depend on when
* the guests translation ends the TB.
*/
void tlb_flush_all_cpus_synced(CPUState *src_cpu);
/**
* tlb_flush_page_by_mmuidx:
* @cpu: CPU whose TLB should be flushed
* @addr: virtual address of page to be flushed
* @...: list of MMU indexes to flush, terminated by a negative value
* @idxmap: bitmap of MMU indexes to flush
*
* Flush one page from the TLB of the specified CPU, for the specified
* MMU indexes.
*/
void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...);
void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr,
uint16_t idxmap);
/**
* tlb_flush_page_by_mmuidx_all_cpus:
* @cpu: Originating CPU of the flush
* @addr: virtual address of page to be flushed
* @idxmap: bitmap of MMU indexes to flush
*
* Flush one page from the TLB of all CPUs, for the specified
* MMU indexes.
*/
void tlb_flush_page_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr,
uint16_t idxmap);
/**
* tlb_flush_page_by_mmuidx_all_cpus_synced:
* @cpu: Originating CPU of the flush
* @addr: virtual address of page to be flushed
* @idxmap: bitmap of MMU indexes to flush
*
* Flush one page from the TLB of all CPUs, for the specified MMU
* indexes like tlb_flush_page_by_mmuidx_all_cpus except the source
* vCPUs work is scheduled as safe work meaning all flushes will be
* complete once the source vCPUs safe work is complete. This will
* depend on when the guests translation ends the TB.
*/
void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu, target_ulong addr,
uint16_t idxmap);
/**
* tlb_flush_by_mmuidx:
* @cpu: CPU whose TLB should be flushed
* @...: list of MMU indexes to flush, terminated by a negative value
* @wait: If true ensure synchronisation by exiting the cpu_loop
* @idxmap: bitmap of MMU indexes to flush
*
* Flush all entries from the TLB of the specified CPU, for the specified
* MMU indexes.
*/
void tlb_flush_by_mmuidx(CPUState *cpu, ...);
void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap);
/**
* tlb_flush_by_mmuidx_all_cpus:
* @cpu: Originating CPU of the flush
* @idxmap: bitmap of MMU indexes to flush
*
* Flush all entries from all TLBs of all CPUs, for the specified
* MMU indexes.
*/
void tlb_flush_by_mmuidx_all_cpus(CPUState *cpu, uint16_t idxmap);
/**
* tlb_flush_by_mmuidx_all_cpus_synced:
* @cpu: Originating CPU of the flush
* @idxmap: bitmap of MMU indexes to flush
*
* Flush all entries from all TLBs of all CPUs, for the specified
* MMU indexes like tlb_flush_by_mmuidx_all_cpus except except the source
* vCPUs work is scheduled as safe work meaning all flushes will be
* complete once the source vCPUs safe work is complete. This will
* depend on when the guests translation ends the TB.
*/
void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, uint16_t idxmap);
/**
* tlb_set_page_with_attrs:
* @cpu: CPU to add this TLB entry for
@ -162,17 +246,45 @@ void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx,
static inline void tlb_flush_page(CPUState *cpu, target_ulong addr)
{
}
static inline void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
{
}
static inline void tlb_flush_page_all_cpus_synced(CPUState *src,
target_ulong addr)
{
}
static inline void tlb_flush(CPUState *cpu)
{
}
static inline void tlb_flush_all_cpus(CPUState *src_cpu)
{
}
static inline void tlb_flush_all_cpus_synced(CPUState *src_cpu)
{
}
static inline void tlb_flush_page_by_mmuidx(CPUState *cpu,
target_ulong addr, ...)
target_ulong addr, uint16_t idxmap)
{
}
static inline void tlb_flush_by_mmuidx(CPUState *cpu, ...)
static inline void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
{
}
static inline void tlb_flush_page_by_mmuidx_all_cpus(CPUState *cpu,
target_ulong addr,
uint16_t idxmap)
{
}
static inline void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu,
target_ulong addr,
uint16_t idxmap)
{
}
static inline void tlb_flush_by_mmuidx_all_cpus(CPUState *cpu, uint16_t idxmap)
{
}
static inline void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu,
uint16_t idxmap)
{
}
#endif
@ -404,8 +516,4 @@ bool memory_region_is_unassigned(MemoryRegion *mr);
/* vl.c */
extern int singlestep;
/* cpu-exec.c, accessed with atomic_mb_read/atomic_mb_set */
extern CPUState *tcg_current_cpu;
extern bool exit_request;
#endif

View file

@ -329,6 +329,7 @@ struct CPUState {
bool unplug;
bool crash_occurred;
bool exit_request;
/* updates protected by BQL */
uint32_t interrupt_request;
int singlestep_enabled;
int64_t icount_extra;
@ -401,6 +402,12 @@ struct CPUState {
bool hax_vcpu_dirty;
struct hax_vcpu_state *hax_vcpu;
/* The pending_tlb_flush flag is set and cleared atomically to
* avoid potential races. The aim of the flag is to avoid
* unnecessary flushes.
*/
uint16_t pending_tlb_flush;
};
QTAILQ_HEAD(CPUTailQ, CPUState);
@ -415,6 +422,15 @@ extern struct CPUTailQ cpus;
extern __thread CPUState *current_cpu;
/**
* qemu_tcg_mttcg_enabled:
* Check whether we are running MultiThread TCG or not.
*
* Returns: %true if we are in MTTCG mode %false otherwise.
*/
extern bool mttcg_enabled;
#define qemu_tcg_mttcg_enabled() (mttcg_enabled)
/**
* cpu_paging_enabled:
* @cpu: The CPU whose state is to be inspected.

View file

@ -36,4 +36,6 @@ extern int smp_threads;
void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg);
void qemu_tcg_configure(QemuOpts *opts, Error **errp);
#endif

View file

@ -917,6 +917,8 @@ void memory_region_transaction_commit(void)
AddressSpace *as;
assert(memory_region_transaction_depth);
assert(qemu_mutex_iothread_locked());
--memory_region_transaction_depth;
if (!memory_region_transaction_depth) {
if (memory_region_update_pending) {

View file

@ -95,6 +95,26 @@ STEXI
Select CPU model (@code{-cpu help} for list and additional feature selection)
ETEXI
DEF("accel", HAS_ARG, QEMU_OPTION_accel,
"-accel [accel=]accelerator[,thread=single|multi]\n"
" select accelerator ('-accel help for list')\n"
" thread=single|multi (enable multi-threaded TCG)", QEMU_ARCH_ALL)
STEXI
@item -accel @var{name}[,prop=@var{value}[,...]]
@findex -accel
This is used to enable an accelerator. Depending on the target architecture,
kvm, xen, or tcg can be available. By default, tcg is used. If there is more
than one accelerator specified, the next one is used if the previous one fails
to initialize.
@table @option
@item thread=single|multi
Controls number of TCG threads. When the TCG is multi-threaded there will be one
thread per vCPU therefor taking advantage of additional host cores. The default
is to enable multi-threading where both the back-end and front-ends support it and
no incompatible TCG features have been enabled (e.g. icount/replay).
@end table
ETEXI
DEF("smp", HAS_ARG, QEMU_OPTION_smp,
"-smp [cpus=]n[,maxcpus=cpus][,cores=cores][,threads=threads][,sockets=sockets]\n"
" set the number of CPUs to 'n' [default=1]\n"

View file

@ -113,9 +113,19 @@ static void cpu_common_get_memory_mapping(CPUState *cpu,
error_setg(errp, "Obtaining memory mappings is unsupported on this CPU.");
}
/* Resetting the IRQ comes from across the code base so we take the
* BQL here if we need to. cpu_interrupt assumes it is held.*/
void cpu_reset_interrupt(CPUState *cpu, int mask)
{
bool need_lock = !qemu_mutex_iothread_locked();
if (need_lock) {
qemu_mutex_lock_iothread();
}
cpu->interrupt_request &= ~mask;
if (need_lock) {
qemu_mutex_unlock_iothread();
}
}
void cpu_exit(CPUState *cpu)

View file

@ -14,6 +14,7 @@
#include "internals.h"
#include "arm-powerctl.h"
#include "qemu/log.h"
#include "qemu/main-loop.h"
#include "exec/exec-all.h"
#ifndef DEBUG_ARM_POWERCTL
@ -48,11 +49,93 @@ CPUState *arm_get_cpu_by_id(uint64_t id)
return NULL;
}
struct CpuOnInfo {
uint64_t entry;
uint64_t context_id;
uint32_t target_el;
bool target_aa64;
};
static void arm_set_cpu_on_async_work(CPUState *target_cpu_state,
run_on_cpu_data data)
{
ARMCPU *target_cpu = ARM_CPU(target_cpu_state);
struct CpuOnInfo *info = (struct CpuOnInfo *) data.host_ptr;
/* Initialize the cpu we are turning on */
cpu_reset(target_cpu_state);
target_cpu_state->halted = 0;
if (info->target_aa64) {
if ((info->target_el < 3) && arm_feature(&target_cpu->env,
ARM_FEATURE_EL3)) {
/*
* As target mode is AArch64, we need to set lower
* exception level (the requested level 2) to AArch64
*/
target_cpu->env.cp15.scr_el3 |= SCR_RW;
}
if ((info->target_el < 2) && arm_feature(&target_cpu->env,
ARM_FEATURE_EL2)) {
/*
* As target mode is AArch64, we need to set lower
* exception level (the requested level 1) to AArch64
*/
target_cpu->env.cp15.hcr_el2 |= HCR_RW;
}
target_cpu->env.pstate = aarch64_pstate_mode(info->target_el, true);
} else {
/* We are requested to boot in AArch32 mode */
static const uint32_t mode_for_el[] = { 0,
ARM_CPU_MODE_SVC,
ARM_CPU_MODE_HYP,
ARM_CPU_MODE_SVC };
cpsr_write(&target_cpu->env, mode_for_el[info->target_el], CPSR_M,
CPSRWriteRaw);
}
if (info->target_el == 3) {
/* Processor is in secure mode */
target_cpu->env.cp15.scr_el3 &= ~SCR_NS;
} else {
/* Processor is not in secure mode */
target_cpu->env.cp15.scr_el3 |= SCR_NS;
}
/* We check if the started CPU is now at the correct level */
assert(info->target_el == arm_current_el(&target_cpu->env));
if (info->target_aa64) {
target_cpu->env.xregs[0] = info->context_id;
target_cpu->env.thumb = false;
} else {
target_cpu->env.regs[0] = info->context_id;
target_cpu->env.thumb = info->entry & 1;
info->entry &= 0xfffffffe;
}
/* Start the new CPU at the requested address */
cpu_set_pc(target_cpu_state, info->entry);
g_free(info);
/* Finally set the power status */
assert(qemu_mutex_iothread_locked());
target_cpu->power_state = PSCI_ON;
}
int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id,
uint32_t target_el, bool target_aa64)
{
CPUState *target_cpu_state;
ARMCPU *target_cpu;
struct CpuOnInfo *info;
assert(qemu_mutex_iothread_locked());
DPRINTF("cpu %" PRId64 " (EL %d, %s) @ 0x%" PRIx64 " with R0 = 0x%" PRIx64
"\n", cpuid, target_el, target_aa64 ? "aarch64" : "aarch32", entry,
@ -77,7 +160,7 @@ int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id,
}
target_cpu = ARM_CPU(target_cpu_state);
if (!target_cpu->powered_off) {
if (target_cpu->power_state == PSCI_ON) {
qemu_log_mask(LOG_GUEST_ERROR,
"[ARM]%s: CPU %" PRId64 " is already on\n",
__func__, cpuid);
@ -109,74 +192,54 @@ int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id,
return QEMU_ARM_POWERCTL_INVALID_PARAM;
}
/* Initialize the cpu we are turning on */
cpu_reset(target_cpu_state);
target_cpu->powered_off = false;
target_cpu_state->halted = 0;
if (target_aa64) {
if ((target_el < 3) && arm_feature(&target_cpu->env, ARM_FEATURE_EL3)) {
/*
* As target mode is AArch64, we need to set lower
* exception level (the requested level 2) to AArch64
*/
target_cpu->env.cp15.scr_el3 |= SCR_RW;
}
if ((target_el < 2) && arm_feature(&target_cpu->env, ARM_FEATURE_EL2)) {
/*
* As target mode is AArch64, we need to set lower
* exception level (the requested level 1) to AArch64
*/
target_cpu->env.cp15.hcr_el2 |= HCR_RW;
}
target_cpu->env.pstate = aarch64_pstate_mode(target_el, true);
} else {
/* We are requested to boot in AArch32 mode */
static uint32_t mode_for_el[] = { 0,
ARM_CPU_MODE_SVC,
ARM_CPU_MODE_HYP,
ARM_CPU_MODE_SVC };
cpsr_write(&target_cpu->env, mode_for_el[target_el], CPSR_M,
CPSRWriteRaw);
/*
* If another CPU has powered the target on we are in the state
* ON_PENDING and additional attempts to power on the CPU should
* fail (see 6.6 Implementation CPU_ON/CPU_OFF races in the PSCI
* spec)
*/
if (target_cpu->power_state == PSCI_ON_PENDING) {
qemu_log_mask(LOG_GUEST_ERROR,
"[ARM]%s: CPU %" PRId64 " is already powering on\n",
__func__, cpuid);
return QEMU_ARM_POWERCTL_ON_PENDING;
}
if (target_el == 3) {
/* Processor is in secure mode */
target_cpu->env.cp15.scr_el3 &= ~SCR_NS;
} else {
/* Processor is not in secure mode */
target_cpu->env.cp15.scr_el3 |= SCR_NS;
}
/* To avoid racing with a CPU we are just kicking off we do the
* final bit of preparation for the work in the target CPUs
* context.
*/
info = g_new(struct CpuOnInfo, 1);
info->entry = entry;
info->context_id = context_id;
info->target_el = target_el;
info->target_aa64 = target_aa64;
/* We check if the started CPU is now at the correct level */
assert(target_el == arm_current_el(&target_cpu->env));
if (target_aa64) {
target_cpu->env.xregs[0] = context_id;
target_cpu->env.thumb = false;
} else {
target_cpu->env.regs[0] = context_id;
target_cpu->env.thumb = entry & 1;
entry &= 0xfffffffe;
}
/* Start the new CPU at the requested address */
cpu_set_pc(target_cpu_state, entry);
qemu_cpu_kick(target_cpu_state);
async_run_on_cpu(target_cpu_state, arm_set_cpu_on_async_work,
RUN_ON_CPU_HOST_PTR(info));
/* We are good to go */
return QEMU_ARM_POWERCTL_RET_SUCCESS;
}
static void arm_set_cpu_off_async_work(CPUState *target_cpu_state,
run_on_cpu_data data)
{
ARMCPU *target_cpu = ARM_CPU(target_cpu_state);
assert(qemu_mutex_iothread_locked());
target_cpu->power_state = PSCI_OFF;
target_cpu_state->halted = 1;
target_cpu_state->exception_index = EXCP_HLT;
}
int arm_set_cpu_off(uint64_t cpuid)
{
CPUState *target_cpu_state;
ARMCPU *target_cpu;
assert(qemu_mutex_iothread_locked());
DPRINTF("cpu %" PRId64 "\n", cpuid);
/* change to the cpu we are powering up */
@ -185,27 +248,34 @@ int arm_set_cpu_off(uint64_t cpuid)
return QEMU_ARM_POWERCTL_INVALID_PARAM;
}
target_cpu = ARM_CPU(target_cpu_state);
if (target_cpu->powered_off) {
if (target_cpu->power_state == PSCI_OFF) {
qemu_log_mask(LOG_GUEST_ERROR,
"[ARM]%s: CPU %" PRId64 " is already off\n",
__func__, cpuid);
return QEMU_ARM_POWERCTL_IS_OFF;
}
target_cpu->powered_off = true;
target_cpu_state->halted = 1;
target_cpu_state->exception_index = EXCP_HLT;
cpu_loop_exit(target_cpu_state);
/* notreached */
/* Queue work to run under the target vCPUs context */
async_run_on_cpu(target_cpu_state, arm_set_cpu_off_async_work,
RUN_ON_CPU_NULL);
return QEMU_ARM_POWERCTL_RET_SUCCESS;
}
static void arm_reset_cpu_async_work(CPUState *target_cpu_state,
run_on_cpu_data data)
{
/* Reset the cpu */
cpu_reset(target_cpu_state);
}
int arm_reset_cpu(uint64_t cpuid)
{
CPUState *target_cpu_state;
ARMCPU *target_cpu;
assert(qemu_mutex_iothread_locked());
DPRINTF("cpu %" PRId64 "\n", cpuid);
/* change to the cpu we are resetting */
@ -214,15 +284,17 @@ int arm_reset_cpu(uint64_t cpuid)
return QEMU_ARM_POWERCTL_INVALID_PARAM;
}
target_cpu = ARM_CPU(target_cpu_state);
if (target_cpu->powered_off) {
if (target_cpu->power_state == PSCI_OFF) {
qemu_log_mask(LOG_GUEST_ERROR,
"[ARM]%s: CPU %" PRId64 " is off\n",
__func__, cpuid);
return QEMU_ARM_POWERCTL_IS_OFF;
}
/* Reset the cpu */
cpu_reset(target_cpu_state);
/* Queue work to run under the target vCPUs context */
async_run_on_cpu(target_cpu_state, arm_reset_cpu_async_work,
RUN_ON_CPU_NULL);
return QEMU_ARM_POWERCTL_RET_SUCCESS;
}

View file

@ -17,6 +17,7 @@
#define QEMU_ARM_POWERCTL_INVALID_PARAM QEMU_PSCI_RET_INVALID_PARAMS
#define QEMU_ARM_POWERCTL_ALREADY_ON QEMU_PSCI_RET_ALREADY_ON
#define QEMU_ARM_POWERCTL_IS_OFF QEMU_PSCI_RET_DENIED
#define QEMU_ARM_POWERCTL_ON_PENDING QEMU_PSCI_RET_ON_PENDING
/*
* arm_get_cpu_by_id:
@ -43,6 +44,7 @@ CPUState *arm_get_cpu_by_id(uint64_t cpuid);
* Returns: QEMU_ARM_POWERCTL_RET_SUCCESS on success.
* QEMU_ARM_POWERCTL_INVALID_PARAM if bad parameters are provided.
* QEMU_ARM_POWERCTL_ALREADY_ON if the CPU was already started.
* QEMU_ARM_POWERCTL_ON_PENDING if the CPU is still powering up
*/
int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id,
uint32_t target_el, bool target_aa64);

View file

@ -45,7 +45,7 @@ static bool arm_cpu_has_work(CPUState *cs)
{
ARMCPU *cpu = ARM_CPU(cs);
return !cpu->powered_off
return (cpu->power_state != PSCI_OFF)
&& cs->interrupt_request &
(CPU_INTERRUPT_FIQ | CPU_INTERRUPT_HARD
| CPU_INTERRUPT_VFIQ | CPU_INTERRUPT_VIRQ
@ -132,7 +132,7 @@ static void arm_cpu_reset(CPUState *s)
env->vfp.xregs[ARM_VFP_MVFR1] = cpu->mvfr1;
env->vfp.xregs[ARM_VFP_MVFR2] = cpu->mvfr2;
cpu->powered_off = cpu->start_powered_off;
cpu->power_state = cpu->start_powered_off ? PSCI_OFF : PSCI_ON;
s->halted = cpu->start_powered_off;
if (arm_feature(env, ARM_FEATURE_IWMMXT)) {

View file

@ -30,6 +30,9 @@
# define TARGET_LONG_BITS 32
#endif
/* ARM processors have a weak memory model */
#define TCG_GUEST_DEFAULT_MO (0)
#define CPUArchState struct CPUARMState
#include "qemu-common.h"
@ -526,6 +529,15 @@ typedef struct CPUARMState {
*/
typedef void ARMELChangeHook(ARMCPU *cpu, void *opaque);
/* These values map onto the return values for
* QEMU_PSCI_0_2_FN_AFFINITY_INFO */
typedef enum ARMPSCIState {
PSCI_OFF = 0,
PSCI_ON = 1,
PSCI_ON_PENDING = 2
} ARMPSCIState;
/**
* ARMCPU:
* @env: #CPUARMState
@ -582,8 +594,10 @@ struct ARMCPU {
/* Should CPU start in PSCI powered-off state? */
bool start_powered_off;
/* CPU currently in PSCI powered-off state */
bool powered_off;
/* Current power state, access guarded by BQL */
ARMPSCIState power_state;
/* CPU has virtualization extension */
bool has_el2;
/* CPU has security extension */

View file

@ -536,41 +536,33 @@ static void tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri,
static void tlbiall_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
CPUState *other_cs;
CPUState *cs = ENV_GET_CPU(env);
CPU_FOREACH(other_cs) {
tlb_flush(other_cs);
}
tlb_flush_all_cpus_synced(cs);
}
static void tlbiasid_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
CPUState *other_cs;
CPUState *cs = ENV_GET_CPU(env);
CPU_FOREACH(other_cs) {
tlb_flush(other_cs);
}
tlb_flush_all_cpus_synced(cs);
}
static void tlbimva_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
CPUState *other_cs;
CPUState *cs = ENV_GET_CPU(env);
CPU_FOREACH(other_cs) {
tlb_flush_page(other_cs, value & TARGET_PAGE_MASK);
}
tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK);
}
static void tlbimvaa_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
CPUState *other_cs;
CPUState *cs = ENV_GET_CPU(env);
CPU_FOREACH(other_cs) {
tlb_flush_page(other_cs, value & TARGET_PAGE_MASK);
}
tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK);
}
static void tlbiall_nsnh_write(CPUARMState *env, const ARMCPRegInfo *ri,
@ -578,19 +570,21 @@ static void tlbiall_nsnh_write(CPUARMState *env, const ARMCPRegInfo *ri,
{
CPUState *cs = ENV_GET_CPU(env);
tlb_flush_by_mmuidx(cs, ARMMMUIdx_S12NSE1, ARMMMUIdx_S12NSE0,
ARMMMUIdx_S2NS, -1);
tlb_flush_by_mmuidx(cs,
(1 << ARMMMUIdx_S12NSE1) |
(1 << ARMMMUIdx_S12NSE0) |
(1 << ARMMMUIdx_S2NS));
}
static void tlbiall_nsnh_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
CPUState *other_cs;
CPUState *cs = ENV_GET_CPU(env);
CPU_FOREACH(other_cs) {
tlb_flush_by_mmuidx(other_cs, ARMMMUIdx_S12NSE1,
ARMMMUIdx_S12NSE0, ARMMMUIdx_S2NS, -1);
}
tlb_flush_by_mmuidx_all_cpus_synced(cs,
(1 << ARMMMUIdx_S12NSE1) |
(1 << ARMMMUIdx_S12NSE0) |
(1 << ARMMMUIdx_S2NS));
}
static void tlbiipas2_write(CPUARMState *env, const ARMCPRegInfo *ri,
@ -611,13 +605,13 @@ static void tlbiipas2_write(CPUARMState *env, const ARMCPRegInfo *ri,
pageaddr = sextract64(value << 12, 0, 40);
tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdx_S2NS, -1);
tlb_flush_page_by_mmuidx(cs, pageaddr, (1 << ARMMMUIdx_S2NS));
}
static void tlbiipas2_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
CPUState *other_cs;
CPUState *cs = ENV_GET_CPU(env);
uint64_t pageaddr;
if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
@ -626,9 +620,8 @@ static void tlbiipas2_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
pageaddr = sextract64(value << 12, 0, 40);
CPU_FOREACH(other_cs) {
tlb_flush_page_by_mmuidx(other_cs, pageaddr, ARMMMUIdx_S2NS, -1);
}
tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
(1 << ARMMMUIdx_S2NS));
}
static void tlbiall_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
@ -636,17 +629,15 @@ static void tlbiall_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
{
CPUState *cs = ENV_GET_CPU(env);
tlb_flush_by_mmuidx(cs, ARMMMUIdx_S1E2, -1);
tlb_flush_by_mmuidx(cs, (1 << ARMMMUIdx_S1E2));
}
static void tlbiall_hyp_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
CPUState *other_cs;
CPUState *cs = ENV_GET_CPU(env);
CPU_FOREACH(other_cs) {
tlb_flush_by_mmuidx(other_cs, ARMMMUIdx_S1E2, -1);
}
tlb_flush_by_mmuidx_all_cpus_synced(cs, (1 << ARMMMUIdx_S1E2));
}
static void tlbimva_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
@ -655,18 +646,17 @@ static void tlbimva_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
CPUState *cs = ENV_GET_CPU(env);
uint64_t pageaddr = value & ~MAKE_64BIT_MASK(0, 12);
tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdx_S1E2, -1);
tlb_flush_page_by_mmuidx(cs, pageaddr, (1 << ARMMMUIdx_S1E2));
}
static void tlbimva_hyp_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
CPUState *other_cs;
CPUState *cs = ENV_GET_CPU(env);
uint64_t pageaddr = value & ~MAKE_64BIT_MASK(0, 12);
CPU_FOREACH(other_cs) {
tlb_flush_page_by_mmuidx(other_cs, pageaddr, ARMMMUIdx_S1E2, -1);
}
tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
(1 << ARMMMUIdx_S1E2));
}
static const ARMCPRegInfo cp_reginfo[] = {
@ -2542,8 +2532,10 @@ static void vttbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
/* Accesses to VTTBR may change the VMID so we must flush the TLB. */
if (raw_read(env, ri) != value) {
tlb_flush_by_mmuidx(cs, ARMMMUIdx_S12NSE1, ARMMMUIdx_S12NSE0,
ARMMMUIdx_S2NS, -1);
tlb_flush_by_mmuidx(cs,
(1 << ARMMMUIdx_S12NSE1) |
(1 << ARMMMUIdx_S12NSE0) |
(1 << ARMMMUIdx_S2NS));
raw_write(env, ri, value);
}
}
@ -2898,29 +2890,33 @@ static CPAccessResult aa64_cacheop_access(CPUARMState *env,
static void tlbi_aa64_vmalle1_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
ARMCPU *cpu = arm_env_get_cpu(env);
CPUState *cs = CPU(cpu);
CPUState *cs = ENV_GET_CPU(env);
if (arm_is_secure_below_el3(env)) {
tlb_flush_by_mmuidx(cs, ARMMMUIdx_S1SE1, ARMMMUIdx_S1SE0, -1);
tlb_flush_by_mmuidx(cs,
(1 << ARMMMUIdx_S1SE1) |
(1 << ARMMMUIdx_S1SE0));
} else {
tlb_flush_by_mmuidx(cs, ARMMMUIdx_S12NSE1, ARMMMUIdx_S12NSE0, -1);
tlb_flush_by_mmuidx(cs,
(1 << ARMMMUIdx_S12NSE1) |
(1 << ARMMMUIdx_S12NSE0));
}
}
static void tlbi_aa64_vmalle1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
CPUState *cs = ENV_GET_CPU(env);
bool sec = arm_is_secure_below_el3(env);
CPUState *other_cs;
CPU_FOREACH(other_cs) {
if (sec) {
tlb_flush_by_mmuidx(other_cs, ARMMMUIdx_S1SE1, ARMMMUIdx_S1SE0, -1);
} else {
tlb_flush_by_mmuidx(other_cs, ARMMMUIdx_S12NSE1,
ARMMMUIdx_S12NSE0, -1);
}
if (sec) {
tlb_flush_by_mmuidx_all_cpus_synced(cs,
(1 << ARMMMUIdx_S1SE1) |
(1 << ARMMMUIdx_S1SE0));
} else {
tlb_flush_by_mmuidx_all_cpus_synced(cs,
(1 << ARMMMUIdx_S12NSE1) |
(1 << ARMMMUIdx_S12NSE0));
}
}
@ -2935,13 +2931,19 @@ static void tlbi_aa64_alle1_write(CPUARMState *env, const ARMCPRegInfo *ri,
CPUState *cs = CPU(cpu);
if (arm_is_secure_below_el3(env)) {
tlb_flush_by_mmuidx(cs, ARMMMUIdx_S1SE1, ARMMMUIdx_S1SE0, -1);
tlb_flush_by_mmuidx(cs,
(1 << ARMMMUIdx_S1SE1) |
(1 << ARMMMUIdx_S1SE0));
} else {
if (arm_feature(env, ARM_FEATURE_EL2)) {
tlb_flush_by_mmuidx(cs, ARMMMUIdx_S12NSE1, ARMMMUIdx_S12NSE0,
ARMMMUIdx_S2NS, -1);
tlb_flush_by_mmuidx(cs,
(1 << ARMMMUIdx_S12NSE1) |
(1 << ARMMMUIdx_S12NSE0) |
(1 << ARMMMUIdx_S2NS));
} else {
tlb_flush_by_mmuidx(cs, ARMMMUIdx_S12NSE1, ARMMMUIdx_S12NSE0, -1);
tlb_flush_by_mmuidx(cs,
(1 << ARMMMUIdx_S12NSE1) |
(1 << ARMMMUIdx_S12NSE0));
}
}
}
@ -2952,7 +2954,7 @@ static void tlbi_aa64_alle2_write(CPUARMState *env, const ARMCPRegInfo *ri,
ARMCPU *cpu = arm_env_get_cpu(env);
CPUState *cs = CPU(cpu);
tlb_flush_by_mmuidx(cs, ARMMMUIdx_S1E2, -1);
tlb_flush_by_mmuidx(cs, (1 << ARMMMUIdx_S1E2));
}
static void tlbi_aa64_alle3_write(CPUARMState *env, const ARMCPRegInfo *ri,
@ -2961,7 +2963,7 @@ static void tlbi_aa64_alle3_write(CPUARMState *env, const ARMCPRegInfo *ri,
ARMCPU *cpu = arm_env_get_cpu(env);
CPUState *cs = CPU(cpu);
tlb_flush_by_mmuidx(cs, ARMMMUIdx_S1E3, -1);
tlb_flush_by_mmuidx(cs, (1 << ARMMMUIdx_S1E3));
}
static void tlbi_aa64_alle1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
@ -2971,41 +2973,40 @@ static void tlbi_aa64_alle1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
* stage 2 translations, whereas most other scopes only invalidate
* stage 1 translations.
*/
CPUState *cs = ENV_GET_CPU(env);
bool sec = arm_is_secure_below_el3(env);
bool has_el2 = arm_feature(env, ARM_FEATURE_EL2);
CPUState *other_cs;
CPU_FOREACH(other_cs) {
if (sec) {
tlb_flush_by_mmuidx(other_cs, ARMMMUIdx_S1SE1, ARMMMUIdx_S1SE0, -1);
} else if (has_el2) {
tlb_flush_by_mmuidx(other_cs, ARMMMUIdx_S12NSE1,
ARMMMUIdx_S12NSE0, ARMMMUIdx_S2NS, -1);
} else {
tlb_flush_by_mmuidx(other_cs, ARMMMUIdx_S12NSE1,
ARMMMUIdx_S12NSE0, -1);
}
if (sec) {
tlb_flush_by_mmuidx_all_cpus_synced(cs,
(1 << ARMMMUIdx_S1SE1) |
(1 << ARMMMUIdx_S1SE0));
} else if (has_el2) {
tlb_flush_by_mmuidx_all_cpus_synced(cs,
(1 << ARMMMUIdx_S12NSE1) |
(1 << ARMMMUIdx_S12NSE0) |
(1 << ARMMMUIdx_S2NS));
} else {
tlb_flush_by_mmuidx_all_cpus_synced(cs,
(1 << ARMMMUIdx_S12NSE1) |
(1 << ARMMMUIdx_S12NSE0));
}
}
static void tlbi_aa64_alle2is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
CPUState *other_cs;
CPUState *cs = ENV_GET_CPU(env);
CPU_FOREACH(other_cs) {
tlb_flush_by_mmuidx(other_cs, ARMMMUIdx_S1E2, -1);
}
tlb_flush_by_mmuidx_all_cpus_synced(cs, (1 << ARMMMUIdx_S1E2));
}
static void tlbi_aa64_alle3is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
CPUState *other_cs;
CPUState *cs = ENV_GET_CPU(env);
CPU_FOREACH(other_cs) {
tlb_flush_by_mmuidx(other_cs, ARMMMUIdx_S1E3, -1);
}
tlb_flush_by_mmuidx_all_cpus_synced(cs, (1 << ARMMMUIdx_S1E3));
}
static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri,
@ -3021,11 +3022,13 @@ static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t pageaddr = sextract64(value << 12, 0, 56);
if (arm_is_secure_below_el3(env)) {
tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdx_S1SE1,
ARMMMUIdx_S1SE0, -1);
tlb_flush_page_by_mmuidx(cs, pageaddr,
(1 << ARMMMUIdx_S1SE1) |
(1 << ARMMMUIdx_S1SE0));
} else {
tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdx_S12NSE1,
ARMMMUIdx_S12NSE0, -1);
tlb_flush_page_by_mmuidx(cs, pageaddr,
(1 << ARMMMUIdx_S12NSE1) |
(1 << ARMMMUIdx_S12NSE0));
}
}
@ -3040,7 +3043,7 @@ static void tlbi_aa64_vae2_write(CPUARMState *env, const ARMCPRegInfo *ri,
CPUState *cs = CPU(cpu);
uint64_t pageaddr = sextract64(value << 12, 0, 56);
tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdx_S1E2, -1);
tlb_flush_page_by_mmuidx(cs, pageaddr, (1 << ARMMMUIdx_S1E2));
}
static void tlbi_aa64_vae3_write(CPUARMState *env, const ARMCPRegInfo *ri,
@ -3054,47 +3057,46 @@ static void tlbi_aa64_vae3_write(CPUARMState *env, const ARMCPRegInfo *ri,
CPUState *cs = CPU(cpu);
uint64_t pageaddr = sextract64(value << 12, 0, 56);
tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdx_S1E3, -1);
tlb_flush_page_by_mmuidx(cs, pageaddr, (1 << ARMMMUIdx_S1E3));
}
static void tlbi_aa64_vae1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
ARMCPU *cpu = arm_env_get_cpu(env);
CPUState *cs = CPU(cpu);
bool sec = arm_is_secure_below_el3(env);
CPUState *other_cs;
uint64_t pageaddr = sextract64(value << 12, 0, 56);
CPU_FOREACH(other_cs) {
if (sec) {
tlb_flush_page_by_mmuidx(other_cs, pageaddr, ARMMMUIdx_S1SE1,
ARMMMUIdx_S1SE0, -1);
} else {
tlb_flush_page_by_mmuidx(other_cs, pageaddr, ARMMMUIdx_S12NSE1,
ARMMMUIdx_S12NSE0, -1);
}
if (sec) {
tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
(1 << ARMMMUIdx_S1SE1) |
(1 << ARMMMUIdx_S1SE0));
} else {
tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
(1 << ARMMMUIdx_S12NSE1) |
(1 << ARMMMUIdx_S12NSE0));
}
}
static void tlbi_aa64_vae2is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
CPUState *other_cs;
CPUState *cs = ENV_GET_CPU(env);
uint64_t pageaddr = sextract64(value << 12, 0, 56);
CPU_FOREACH(other_cs) {
tlb_flush_page_by_mmuidx(other_cs, pageaddr, ARMMMUIdx_S1E2, -1);
}
tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
(1 << ARMMMUIdx_S1E2));
}
static void tlbi_aa64_vae3is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
CPUState *other_cs;
CPUState *cs = ENV_GET_CPU(env);
uint64_t pageaddr = sextract64(value << 12, 0, 56);
CPU_FOREACH(other_cs) {
tlb_flush_page_by_mmuidx(other_cs, pageaddr, ARMMMUIdx_S1E3, -1);
}
tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
(1 << ARMMMUIdx_S1E3));
}
static void tlbi_aa64_ipas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri,
@ -3116,13 +3118,13 @@ static void tlbi_aa64_ipas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri,
pageaddr = sextract64(value << 12, 0, 48);
tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdx_S2NS, -1);
tlb_flush_page_by_mmuidx(cs, pageaddr, (1 << ARMMMUIdx_S2NS));
}
static void tlbi_aa64_ipas2e1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
CPUState *other_cs;
CPUState *cs = ENV_GET_CPU(env);
uint64_t pageaddr;
if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
@ -3131,9 +3133,8 @@ static void tlbi_aa64_ipas2e1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
pageaddr = sextract64(value << 12, 0, 48);
CPU_FOREACH(other_cs) {
tlb_flush_page_by_mmuidx(other_cs, pageaddr, ARMMMUIdx_S2NS, -1);
}
tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
(1 << ARMMMUIdx_S2NS));
}
static CPAccessResult aa64_zva_access(CPUARMState *env, const ARMCPRegInfo *ri,
@ -6769,6 +6770,12 @@ void arm_cpu_do_interrupt(CPUState *cs)
arm_cpu_do_interrupt_aarch32(cs);
}
/* Hooks may change global state so BQL should be held, also the
* BQL needs to be held for any modification of
* cs->interrupt_request.
*/
g_assert(qemu_mutex_iothread_locked());
arm_call_el_change_hook(cpu);
if (!kvm_enabled()) {

View file

@ -488,8 +488,8 @@ int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu)
{
if (cap_has_mp_state) {
struct kvm_mp_state mp_state = {
.mp_state =
cpu->powered_off ? KVM_MP_STATE_STOPPED : KVM_MP_STATE_RUNNABLE
.mp_state = (cpu->power_state == PSCI_OFF) ?
KVM_MP_STATE_STOPPED : KVM_MP_STATE_RUNNABLE
};
int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state);
if (ret) {
@ -515,7 +515,8 @@ int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu)
__func__, ret, strerror(-ret));
abort();
}
cpu->powered_off = (mp_state.mp_state == KVM_MP_STATE_STOPPED);
cpu->power_state = (mp_state.mp_state == KVM_MP_STATE_STOPPED) ?
PSCI_OFF : PSCI_ON;
}
return 0;

View file

@ -211,6 +211,38 @@ static const VMStateInfo vmstate_cpsr = {
.put = put_cpsr,
};
static int get_power(QEMUFile *f, void *opaque, size_t size,
VMStateField *field)
{
ARMCPU *cpu = opaque;
bool powered_off = qemu_get_byte(f);
cpu->power_state = powered_off ? PSCI_OFF : PSCI_ON;
return 0;
}
static int put_power(QEMUFile *f, void *opaque, size_t size,
VMStateField *field, QJSON *vmdesc)
{
ARMCPU *cpu = opaque;
/* Migration should never happen while we transition power states */
if (cpu->power_state == PSCI_ON ||
cpu->power_state == PSCI_OFF) {
bool powered_off = (cpu->power_state == PSCI_OFF) ? true : false;
qemu_put_byte(f, powered_off);
return 0;
} else {
return 1;
}
}
static const VMStateInfo vmstate_powered_off = {
.name = "powered_off",
.get = get_power,
.put = put_power,
};
static void cpu_pre_save(void *opaque)
{
ARMCPU *cpu = opaque;
@ -329,7 +361,14 @@ const VMStateDescription vmstate_arm_cpu = {
VMSTATE_UINT64(env.exception.vaddress, ARMCPU),
VMSTATE_TIMER_PTR(gt_timer[GTIMER_PHYS], ARMCPU),
VMSTATE_TIMER_PTR(gt_timer[GTIMER_VIRT], ARMCPU),
VMSTATE_BOOL(powered_off, ARMCPU),
{
.name = "power_state",
.version_id = 0,
.size = sizeof(bool),
.info = &vmstate_powered_off,
.flags = VMS_SINGLE,
.offset = 0,
},
VMSTATE_END_OF_LIST()
},
.subsections = (const VMStateDescription*[]) {

View file

@ -18,6 +18,7 @@
*/
#include "qemu/osdep.h"
#include "qemu/log.h"
#include "qemu/main-loop.h"
#include "cpu.h"
#include "exec/helper-proto.h"
#include "internals.h"
@ -435,6 +436,13 @@ void HELPER(yield)(CPUARMState *env)
ARMCPU *cpu = arm_env_get_cpu(env);
CPUState *cs = CPU(cpu);
/* When running in MTTCG we don't generate jumps to the yield and
* WFE helpers as it won't affect the scheduling of other vCPUs.
* If we wanted to more completely model WFE/SEV so we don't busy
* spin unnecessarily we would need to do something more involved.
*/
g_assert(!parallel_cpus);
/* This is a non-trappable hint instruction that generally indicates
* that the guest is currently busy-looping. Yield control back to the
* top level loop so that a more deserving VCPU has a chance to run.
@ -487,7 +495,9 @@ void HELPER(cpsr_write_eret)(CPUARMState *env, uint32_t val)
*/
env->regs[15] &= (env->thumb ? ~1 : ~3);
qemu_mutex_lock_iothread();
arm_call_el_change_hook(arm_env_get_cpu(env));
qemu_mutex_unlock_iothread();
}
/* Access to user mode registers from privileged modes. */
@ -735,28 +745,58 @@ void HELPER(set_cp_reg)(CPUARMState *env, void *rip, uint32_t value)
{
const ARMCPRegInfo *ri = rip;
ri->writefn(env, ri, value);
if (ri->type & ARM_CP_IO) {
qemu_mutex_lock_iothread();
ri->writefn(env, ri, value);
qemu_mutex_unlock_iothread();
} else {
ri->writefn(env, ri, value);
}
}
uint32_t HELPER(get_cp_reg)(CPUARMState *env, void *rip)
{
const ARMCPRegInfo *ri = rip;
uint32_t res;
return ri->readfn(env, ri);
if (ri->type & ARM_CP_IO) {
qemu_mutex_lock_iothread();
res = ri->readfn(env, ri);
qemu_mutex_unlock_iothread();
} else {
res = ri->readfn(env, ri);
}
return res;
}
void HELPER(set_cp_reg64)(CPUARMState *env, void *rip, uint64_t value)
{
const ARMCPRegInfo *ri = rip;
ri->writefn(env, ri, value);
if (ri->type & ARM_CP_IO) {
qemu_mutex_lock_iothread();
ri->writefn(env, ri, value);
qemu_mutex_unlock_iothread();
} else {
ri->writefn(env, ri, value);
}
}
uint64_t HELPER(get_cp_reg64)(CPUARMState *env, void *rip)
{
const ARMCPRegInfo *ri = rip;
uint64_t res;
return ri->readfn(env, ri);
if (ri->type & ARM_CP_IO) {
qemu_mutex_lock_iothread();
res = ri->readfn(env, ri);
qemu_mutex_unlock_iothread();
} else {
res = ri->readfn(env, ri);
}
return res;
}
void HELPER(msr_i_pstate)(CPUARMState *env, uint32_t op, uint32_t imm)
@ -989,7 +1029,9 @@ void HELPER(exception_return)(CPUARMState *env)
cur_el, new_el, env->pc);
}
qemu_mutex_lock_iothread();
arm_call_el_change_hook(arm_env_get_cpu(env));
qemu_mutex_unlock_iothread();
return;

View file

@ -127,7 +127,9 @@ void arm_handle_psci_call(ARMCPU *cpu)
break;
}
target_cpu = ARM_CPU(target_cpu_state);
ret = target_cpu->powered_off ? 1 : 0;
g_assert(qemu_mutex_iothread_locked());
ret = target_cpu->power_state;
break;
default:
/* Everything above affinity level 0 is always on. */

View file

@ -1328,10 +1328,14 @@ static void handle_hint(DisasContext *s, uint32_t insn,
s->is_jmp = DISAS_WFI;
return;
case 1: /* YIELD */
s->is_jmp = DISAS_YIELD;
if (!parallel_cpus) {
s->is_jmp = DISAS_YIELD;
}
return;
case 2: /* WFE */
s->is_jmp = DISAS_WFE;
if (!parallel_cpus) {
s->is_jmp = DISAS_WFE;
}
return;
case 4: /* SEV */
case 5: /* SEVL */

View file

@ -4404,20 +4404,32 @@ static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
gen_rfe(s, pc, load_cpu_field(spsr));
}
/*
* For WFI we will halt the vCPU until an IRQ. For WFE and YIELD we
* only call the helper when running single threaded TCG code to ensure
* the next round-robin scheduled vCPU gets a crack. In MTTCG mode we
* just skip this instruction. Currently the SEV/SEVL instructions
* which are *one* of many ways to wake the CPU from WFE are not
* implemented so we can't sleep like WFI does.
*/
static void gen_nop_hint(DisasContext *s, int val)
{
switch (val) {
case 1: /* yield */
gen_set_pc_im(s, s->pc);
s->is_jmp = DISAS_YIELD;
if (!parallel_cpus) {
gen_set_pc_im(s, s->pc);
s->is_jmp = DISAS_YIELD;
}
break;
case 3: /* wfi */
gen_set_pc_im(s, s->pc);
s->is_jmp = DISAS_WFI;
break;
case 2: /* wfe */
gen_set_pc_im(s, s->pc);
s->is_jmp = DISAS_WFE;
if (!parallel_cpus) {
gen_set_pc_im(s, s->pc);
s->is_jmp = DISAS_WFE;
}
break;
case 4: /* sev */
case 5: /* sevl */

View file

@ -18,6 +18,7 @@
*/
#include "qemu/osdep.h"
#include "qemu/main-loop.h"
#include "cpu.h"
#include "exec/helper-proto.h"
#include "exec/log.h"
@ -42,11 +43,14 @@ void helper_rsm(CPUX86State *env)
#define SMM_REVISION_ID 0x00020000
#endif
/* Called with iothread lock taken */
void cpu_smm_update(X86CPU *cpu)
{
CPUX86State *env = &cpu->env;
bool smm_enabled = (env->hflags & HF_SMM_MASK);
g_assert(qemu_mutex_iothread_locked());
if (cpu->smram) {
memory_region_set_enabled(cpu->smram, smm_enabled);
}
@ -333,7 +337,10 @@ void helper_rsm(CPUX86State *env)
}
env->hflags2 &= ~HF2_SMM_INSIDE_NMI_MASK;
env->hflags &= ~HF_SMM_MASK;
qemu_mutex_lock_iothread();
cpu_smm_update(cpu);
qemu_mutex_unlock_iothread();
qemu_log_mask(CPU_LOG_INT, "SMM: after RSM\n");
log_cpu_state_mask(CPU_LOG_INT, CPU(cpu), CPU_DUMP_CCOP);

View file

@ -25,6 +25,7 @@
#include "exec/helper-proto.h"
#include "sysemu/kvm.h"
#include "qemu/timer.h"
#include "qemu/main-loop.h"
#include "exec/address-spaces.h"
#ifdef CONFIG_KVM
#include <linux/kvm.h>
@ -109,11 +110,13 @@ void program_interrupt(CPUS390XState *env, uint32_t code, int ilen)
/* SCLP service call */
uint32_t HELPER(servc)(CPUS390XState *env, uint64_t r1, uint64_t r2)
{
qemu_mutex_lock_iothread();
int r = sclp_service_call(env, r1, r2);
if (r < 0) {
program_interrupt(env, -r, 4);
return 0;
r = 0;
}
qemu_mutex_unlock_iothread();
return r;
}

View file

@ -1768,13 +1768,15 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, target_ulong val,
case 1:
env->dmmu.mmu_primary_context = val;
env->immu.mmu_primary_context = val;
tlb_flush_by_mmuidx(CPU(cpu), MMU_USER_IDX, MMU_KERNEL_IDX, -1);
tlb_flush_by_mmuidx(CPU(cpu),
(1 << MMU_USER_IDX) | (1 << MMU_KERNEL_IDX));
break;
case 2:
env->dmmu.mmu_secondary_context = val;
env->immu.mmu_secondary_context = val;
tlb_flush_by_mmuidx(CPU(cpu), MMU_USER_SECONDARY_IDX,
MMU_KERNEL_SECONDARY_IDX, -1);
tlb_flush_by_mmuidx(CPU(cpu),
(1 << MMU_USER_SECONDARY_IDX) |
(1 << MMU_KERNEL_SECONDARY_IDX));
break;
default:
cpu_unassigned_access(cs, addr, true, false, 1, size);

View file

@ -165,4 +165,15 @@ static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
{
}
/* This defines the natural memory order supported by this
* architecture before guarantees made by various barrier
* instructions.
*
* The x86 has a pretty strong memory ordering which only really
* allows for some stores to be re-ordered after loads.
*/
#include "tcg-mo.h"
#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
#endif

48
tcg/tcg-mo.h Normal file
View file

@ -0,0 +1,48 @@
/*
* Tiny Code Generator for QEMU
*
* Copyright (c) 2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef TCG_MO_H
#define TCG_MO_H
typedef enum {
/* Used to indicate the type of accesses on which ordering
is to be ensured. Modeled after SPARC barriers.
This is of the form TCG_MO_A_B where A is before B in program order.
*/
TCG_MO_LD_LD = 0x01,
TCG_MO_ST_LD = 0x02,
TCG_MO_LD_ST = 0x04,
TCG_MO_ST_ST = 0x08,
TCG_MO_ALL = 0x0F, /* OR of the above */
/* Used to indicate the kind of ordering which is to be ensured by the
instruction. These types are derived from x86/aarch64 instructions.
It should be noted that these are different from C11 semantics. */
TCG_BAR_LDAQ = 0x10, /* Following ops will not come forward */
TCG_BAR_STRL = 0x20, /* Previous ops will not be delayed */
TCG_BAR_SC = 0x30, /* No ops cross barrier; OR of the above */
} TCGBar;
#endif /* TCG_MO_H */

View file

@ -29,6 +29,7 @@
#include "cpu.h"
#include "exec/tb-context.h"
#include "qemu/bitops.h"
#include "tcg-mo.h"
#include "tcg-target.h"
/* XXX: make safe guess about sizes */
@ -79,6 +80,15 @@ typedef uint64_t tcg_target_ulong;
#error unsupported
#endif
/* Oversized TCG guests make things like MTTCG hard
* as we can't use atomics for cputlb updates.
*/
#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
#define TCG_OVERSIZED_GUEST 1
#else
#define TCG_OVERSIZED_GUEST 0
#endif
#if TCG_TARGET_NB_REGS <= 32
typedef uint32_t TCGRegSet;
#elif TCG_TARGET_NB_REGS <= 64
@ -498,23 +508,6 @@ static inline intptr_t QEMU_ARTIFICIAL GET_TCGV_PTR(TCGv_ptr t)
#define TCG_CALL_DUMMY_TCGV MAKE_TCGV_I32(-1)
#define TCG_CALL_DUMMY_ARG ((TCGArg)(-1))
typedef enum {
/* Used to indicate the type of accesses on which ordering
is to be ensured. Modeled after SPARC barriers. */
TCG_MO_LD_LD = 0x01,
TCG_MO_ST_LD = 0x02,
TCG_MO_LD_ST = 0x04,
TCG_MO_ST_ST = 0x08,
TCG_MO_ALL = 0x0F, /* OR of the above */
/* Used to indicate the kind of ordering which is to be ensured by the
instruction. These types are derived from x86/aarch64 instructions.
It should be noted that these are different from C11 semantics. */
TCG_BAR_LDAQ = 0x10, /* Following ops will not come forward */
TCG_BAR_STRL = 0x20, /* Previous ops will not be delayed */
TCG_BAR_SC = 0x30, /* No ops cross barrier; OR of the above */
} TCGBar;
/* Conditions. Note that these are laid out for easy manipulation by
the functions below:
bit 0 is used for inverting;

View file

@ -55,11 +55,11 @@
#include "translate-all.h"
#include "qemu/bitmap.h"
#include "qemu/timer.h"
#include "qemu/main-loop.h"
#include "exec/log.h"
/* #define DEBUG_TB_INVALIDATE */
/* #define DEBUG_TB_FLUSH */
/* #define DEBUG_LOCKING */
/* make various TB consistency checks */
/* #define DEBUG_TB_CHECK */
@ -74,20 +74,10 @@
* access to the memory related structures are protected with the
* mmap_lock.
*/
#ifdef DEBUG_LOCKING
#define DEBUG_MEM_LOCKS 1
#else
#define DEBUG_MEM_LOCKS 0
#endif
#ifdef CONFIG_SOFTMMU
#define assert_memory_lock() do { /* nothing */ } while (0)
#define assert_memory_lock() tcg_debug_assert(have_tb_lock)
#else
#define assert_memory_lock() do { \
if (DEBUG_MEM_LOCKS) { \
g_assert(have_mmap_lock()); \
} \
} while (0)
#define assert_memory_lock() tcg_debug_assert(have_mmap_lock())
#endif
#define SMC_BITMAP_USE_THRESHOLD 10
@ -145,9 +135,7 @@ TCGContext tcg_ctx;
bool parallel_cpus;
/* translation block context */
#ifdef CONFIG_USER_ONLY
__thread int have_tb_lock;
#endif
static void page_table_config_init(void)
{
@ -169,51 +157,31 @@ static void page_table_config_init(void)
assert(v_l2_levels >= 0);
}
#define assert_tb_locked() tcg_debug_assert(have_tb_lock)
#define assert_tb_unlocked() tcg_debug_assert(!have_tb_lock)
void tb_lock(void)
{
#ifdef CONFIG_USER_ONLY
assert(!have_tb_lock);
assert_tb_unlocked();
qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
have_tb_lock++;
#endif
}
void tb_unlock(void)
{
#ifdef CONFIG_USER_ONLY
assert(have_tb_lock);
assert_tb_locked();
have_tb_lock--;
qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
#endif
}
void tb_lock_reset(void)
{
#ifdef CONFIG_USER_ONLY
if (have_tb_lock) {
qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
have_tb_lock = 0;
}
#endif
}
#ifdef DEBUG_LOCKING
#define DEBUG_TB_LOCKS 1
#else
#define DEBUG_TB_LOCKS 0
#endif
#ifdef CONFIG_SOFTMMU
#define assert_tb_lock() do { /* nothing */ } while (0)
#else
#define assert_tb_lock() do { \
if (DEBUG_TB_LOCKS) { \
g_assert(have_tb_lock); \
} \
} while (0)
#endif
static TranslationBlock *tb_find_pc(uintptr_t tc_ptr);
void cpu_gen_init(void)
@ -847,7 +815,7 @@ static TranslationBlock *tb_alloc(target_ulong pc)
{
TranslationBlock *tb;
assert_tb_lock();
assert_tb_locked();
if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks) {
return NULL;
@ -862,7 +830,7 @@ static TranslationBlock *tb_alloc(target_ulong pc)
/* Called with tb_lock held. */
void tb_free(TranslationBlock *tb)
{
assert_tb_lock();
assert_tb_locked();
/* In practice this is mostly used for single use temporary TB
Ignore the hard cases and just back up if this TB happens to
@ -1104,7 +1072,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
uint32_t h;
tb_page_addr_t phys_pc;
assert_tb_lock();
assert_tb_locked();
atomic_set(&tb->invalid, true);
@ -1421,7 +1389,7 @@ static void tb_invalidate_phys_range_1(tb_page_addr_t start, tb_page_addr_t end)
#ifdef CONFIG_SOFTMMU
void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end)
{
assert_tb_lock();
assert_tb_locked();
tb_invalidate_phys_range_1(start, end);
}
#else
@ -1464,7 +1432,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
#endif /* TARGET_HAS_PRECISE_SMC */
assert_memory_lock();
assert_tb_lock();
assert_tb_locked();
p = page_find(start >> TARGET_PAGE_BITS);
if (!p) {
@ -1543,7 +1511,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
#ifdef CONFIG_SOFTMMU
/* len must be <= 8 and start must be a multiple of len.
* Called via softmmu_template.h when code areas are written to with
* tb_lock held.
* iothread mutex not held.
*/
void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
{
@ -1745,7 +1713,10 @@ void tb_check_watchpoint(CPUState *cpu)
#ifndef CONFIG_USER_ONLY
/* in deterministic execution mode, instructions doing device I/Os
must be at the end of the TB */
* must be at the end of the TB.
*
* Called by softmmu_template.h, with iothread mutex not held.
*/
void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
{
#if defined(TARGET_MIPS) || defined(TARGET_SH4)
@ -1957,6 +1928,7 @@ void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf)
void cpu_interrupt(CPUState *cpu, int mask)
{
g_assert(qemu_mutex_iothread_locked());
cpu->interrupt_request |= mask;
cpu->tcg_exit_req = 1;
}

View file

@ -21,6 +21,7 @@
#include "qemu-common.h"
#include "qom/cpu.h"
#include "sysemu/cpus.h"
#include "qemu/main-loop.h"
uintptr_t qemu_real_host_page_size;
intptr_t qemu_real_host_page_mask;
@ -30,6 +31,7 @@ intptr_t qemu_real_host_page_mask;
static void tcg_handle_interrupt(CPUState *cpu, int mask)
{
int old_mask;
g_assert(qemu_mutex_iothread_locked());
old_mask = cpu->interrupt_request;
cpu->interrupt_request |= mask;
@ -40,17 +42,16 @@ static void tcg_handle_interrupt(CPUState *cpu, int mask)
*/
if (!qemu_cpu_is_self(cpu)) {
qemu_cpu_kick(cpu);
return;
}
if (use_icount) {
cpu->icount_decr.u16.high = 0xffff;
if (!cpu->can_do_io
&& (mask & ~old_mask) != 0) {
cpu_abort(cpu, "Raised interrupt while not in I/O function");
}
} else {
cpu->tcg_exit_req = 1;
if (use_icount) {
cpu->icount_decr.u16.high = 0xffff;
if (!cpu->can_do_io
&& (mask & ~old_mask) != 0) {
cpu_abort(cpu, "Raised interrupt while not in I/O function");
}
} else {
cpu->tcg_exit_req = 1;
}
}
}

49
vl.c
View file

@ -300,6 +300,26 @@ static QemuOptsList qemu_machine_opts = {
},
};
static QemuOptsList qemu_accel_opts = {
.name = "accel",
.implied_opt_name = "accel",
.head = QTAILQ_HEAD_INITIALIZER(qemu_accel_opts.head),
.merge_lists = true,
.desc = {
{
.name = "accel",
.type = QEMU_OPT_STRING,
.help = "Select the type of accelerator",
},
{
.name = "thread",
.type = QEMU_OPT_STRING,
.help = "Enable/disable multi-threaded TCG",
},
{ /* end of list */ }
},
};
static QemuOptsList qemu_boot_opts = {
.name = "boot-opts",
.implied_opt_name = "order",
@ -2928,7 +2948,8 @@ int main(int argc, char **argv, char **envp)
const char *boot_once = NULL;
DisplayState *ds;
int cyls, heads, secs, translation;
QemuOpts *hda_opts = NULL, *opts, *machine_opts, *icount_opts = NULL;
QemuOpts *opts, *machine_opts;
QemuOpts *hda_opts = NULL, *icount_opts = NULL, *accel_opts = NULL;
QemuOptsList *olist;
int optind;
const char *optarg;
@ -2983,6 +3004,7 @@ int main(int argc, char **argv, char **envp)
qemu_add_opts(&qemu_trace_opts);
qemu_add_opts(&qemu_option_rom_opts);
qemu_add_opts(&qemu_machine_opts);
qemu_add_opts(&qemu_accel_opts);
qemu_add_opts(&qemu_mem_opts);
qemu_add_opts(&qemu_smp_opts);
qemu_add_opts(&qemu_boot_opts);
@ -3675,6 +3697,26 @@ int main(int argc, char **argv, char **envp)
qdev_prop_register_global(&kvm_pit_lost_tick_policy);
break;
}
case QEMU_OPTION_accel:
accel_opts = qemu_opts_parse_noisily(qemu_find_opts("accel"),
optarg, true);
optarg = qemu_opt_get(accel_opts, "accel");
olist = qemu_find_opts("machine");
if (strcmp("kvm", optarg) == 0) {
qemu_opts_parse_noisily(olist, "accel=kvm", false);
} else if (strcmp("xen", optarg) == 0) {
qemu_opts_parse_noisily(olist, "accel=xen", false);
} else if (strcmp("tcg", optarg) == 0) {
qemu_opts_parse_noisily(olist, "accel=tcg", false);
} else {
if (!is_help_option(optarg)) {
error_printf("Unknown accelerator: %s", optarg);
}
error_printf("Supported accelerators: kvm, xen, tcg\n");
exit(1);
}
break;
case QEMU_OPTION_usb:
olist = qemu_find_opts("machine");
qemu_opts_parse_noisily(olist, "usb=on", false);
@ -3983,6 +4025,8 @@ int main(int argc, char **argv, char **envp)
replay_configure(icount_opts);
qemu_tcg_configure(accel_opts, &error_fatal);
machine_class = select_machine();
set_memory_options(&ram_slots, &maxram_size, machine_class);
@ -4349,6 +4393,9 @@ int main(int argc, char **argv, char **envp)
if (!tcg_enabled()) {
error_report("-icount is not allowed with hardware virtualization");
exit(1);
} else if (qemu_tcg_mttcg_enabled()) {
error_report("-icount does not currently work with MTTCG");
exit(1);
}
configure_icount(icount_opts, &error_abort);
qemu_opts_del(icount_opts);