mirror of
https://gitlab.com/qemu-project/qemu
synced 2024-11-05 20:35:44 +00:00
Clean up code_gen_buffer allocation.
Add tcg_remove_ops_after. Fix tcg_constant_* documentation. Improve TB chaining documentation. Fix float32_exp2. Fix arm tcg_out_op function signature. -----BEGIN PGP SIGNATURE----- iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmDGrQMdHHJpY2hhcmQu aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV9kZgf+LSKbkimQKNGJNpFp xdMG2B0681tHyO7aiDHZqZf9Izeey7x9vGZRZzPfxomdN8qYT2PiklNx2yIIxNRt WdJ3e7+l7cYjAGY6HdrTqJ6ZiqOOftMzMuHWiXiHD0rMYTIjXgAfsf+H+lVRwMCR BbQBB1ttUJzDSLkM5B2rNuWEjup4shAMgijkipkqkaWrzZIAvfIkcfScZyqWFguG GoWnQxIHq7XMbveUX1Tu1JcdVZlXmuMl0LMQ8Qj5sbep1gjyYixuV6lWupp6SPX9 quRBsyVSmAa4frK67huJ4WVq4gZ2VylNvpiwjwoChYgJ8TOU73n7KGZOAl6i0iq2 ytR6Pw== =ft63 -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/rth-gitlab/tags/pull-tcg-20210613' into staging Clean up code_gen_buffer allocation. Add tcg_remove_ops_after. Fix tcg_constant_* documentation. Improve TB chaining documentation. Fix float32_exp2. Fix arm tcg_out_op function signature. # gpg: Signature made Mon 14 Jun 2021 02:12:35 BST # gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F # gpg: issuer "richard.henderson@linaro.org" # gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full] # Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F * remotes/rth-gitlab/tags/pull-tcg-20210613: (34 commits) docs/devel: Explain in more detail the TB chaining mechanisms softfloat: Fix tp init in float32_exp2 tcg/arm: Fix tcg_out_op function signature tcg: Fix documentation for tcg_constant_* vs tcg_temp_free_* tcg: Introduce tcg_remove_ops_after tcg: Move tcg_init_ctx and tcg_ctx from accel/tcg/ tcg: When allocating for !splitwx, begin with PROT_NONE tcg: Merge buffer protection and guard page protection tcg: Round the tb_size default from qemu_get_host_physmem util/osdep: Add qemu_mprotect_rw tcg: Sink qemu_madvise call to common code tcg: Return the map protection from alloc_code_gen_buffer tcg: Allocate code_gen_buffer into struct tcg_region_state tcg: Move in_code_gen_buffer and tests to region.c tcg: Tidy split_cross_256mb tcg: Tidy tcg_n_regions tcg: Rename region.start to region.after_prologue tcg: Replace region.end with region.total_size tcg: Move MAX_CODE_GEN_BUFFER_SIZE to tcg-target.h tcg: Introduce tcg_max_ctxs ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
fbe7919ece
27 changed files with 1266 additions and 1090 deletions
|
@ -16,5 +16,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc,
|
|||
int cflags);
|
||||
|
||||
void QEMU_NORETURN cpu_io_recompile(CPUState *cpu, uintptr_t retaddr);
|
||||
void page_init(void);
|
||||
void tb_htable_init(void);
|
||||
|
||||
#endif /* ACCEL_TCG_INTERNAL_H */
|
||||
|
|
|
@ -32,6 +32,11 @@
|
|||
#include "qemu/error-report.h"
|
||||
#include "qemu/accel.h"
|
||||
#include "qapi/qapi-builtin-visit.h"
|
||||
#include "qemu/units.h"
|
||||
#if !defined(CONFIG_USER_ONLY)
|
||||
#include "hw/boards.h"
|
||||
#endif
|
||||
#include "internal.h"
|
||||
|
||||
struct TCGState {
|
||||
AccelState parent_obj;
|
||||
|
@ -105,22 +110,29 @@ static void tcg_accel_instance_init(Object *obj)
|
|||
|
||||
bool mttcg_enabled;
|
||||
|
||||
static int tcg_init(MachineState *ms)
|
||||
static int tcg_init_machine(MachineState *ms)
|
||||
{
|
||||
TCGState *s = TCG_STATE(current_accel());
|
||||
#ifdef CONFIG_USER_ONLY
|
||||
unsigned max_cpus = 1;
|
||||
#else
|
||||
unsigned max_cpus = ms->smp.max_cpus;
|
||||
#endif
|
||||
|
||||
tcg_exec_init(s->tb_size * 1024 * 1024, s->splitwx_enabled);
|
||||
tcg_allowed = true;
|
||||
mttcg_enabled = s->mttcg_enabled;
|
||||
|
||||
page_init();
|
||||
tb_htable_init();
|
||||
tcg_init(s->tb_size * MiB, s->splitwx_enabled, max_cpus);
|
||||
|
||||
#if defined(CONFIG_SOFTMMU)
|
||||
/*
|
||||
* Initialize TCG regions only for softmmu.
|
||||
*
|
||||
* This needs to be done later for user mode, because the prologue
|
||||
* generation needs to be delayed so that GUEST_BASE is already set.
|
||||
* There's no guest base to take into account, so go ahead and
|
||||
* initialize the prologue now.
|
||||
*/
|
||||
#ifndef CONFIG_USER_ONLY
|
||||
tcg_region_init();
|
||||
#endif /* !CONFIG_USER_ONLY */
|
||||
tcg_prologue_init(tcg_ctx);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -200,7 +212,7 @@ static void tcg_accel_class_init(ObjectClass *oc, void *data)
|
|||
{
|
||||
AccelClass *ac = ACCEL_CLASS(oc);
|
||||
ac->name = "tcg";
|
||||
ac->init_machine = tcg_init;
|
||||
ac->init_machine = tcg_init_machine;
|
||||
ac->allowed = &tcg_allowed;
|
||||
|
||||
object_class_property_add_str(oc, "thread",
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/units.h"
|
||||
#include "qemu-common.h"
|
||||
|
||||
#define NO_CPU_IO_DEFS
|
||||
|
@ -49,7 +48,6 @@
|
|||
#include "exec/cputlb.h"
|
||||
#include "exec/translate-all.h"
|
||||
#include "qemu/bitmap.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "qemu/qemu-print.h"
|
||||
#include "qemu/timer.h"
|
||||
#include "qemu/main-loop.h"
|
||||
|
@ -220,9 +218,6 @@ static int v_l2_levels;
|
|||
|
||||
static void *l1_map[V_L1_MAX_SIZE];
|
||||
|
||||
/* code generation context */
|
||||
TCGContext tcg_init_ctx;
|
||||
__thread TCGContext *tcg_ctx;
|
||||
TBContext tb_ctx;
|
||||
|
||||
static void page_table_config_init(void)
|
||||
|
@ -245,11 +240,6 @@ static void page_table_config_init(void)
|
|||
assert(v_l2_levels >= 0);
|
||||
}
|
||||
|
||||
static void cpu_gen_init(void)
|
||||
{
|
||||
tcg_context_init(&tcg_init_ctx);
|
||||
}
|
||||
|
||||
/* Encode VAL as a signed leb128 sequence at P.
|
||||
Return P incremented past the encoded value. */
|
||||
static uint8_t *encode_sleb128(uint8_t *p, target_long val)
|
||||
|
@ -415,7 +405,7 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
|
|||
return false;
|
||||
}
|
||||
|
||||
static void page_init(void)
|
||||
void page_init(void)
|
||||
{
|
||||
page_size_init();
|
||||
page_table_config_init();
|
||||
|
@ -900,408 +890,6 @@ static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
|
|||
}
|
||||
}
|
||||
|
||||
/* Minimum size of the code gen buffer. This number is randomly chosen,
|
||||
but not so small that we can't have a fair number of TB's live. */
|
||||
#define MIN_CODE_GEN_BUFFER_SIZE (1 * MiB)
|
||||
|
||||
/* Maximum size of the code gen buffer we'd like to use. Unless otherwise
|
||||
indicated, this is constrained by the range of direct branches on the
|
||||
host cpu, as used by the TCG implementation of goto_tb. */
|
||||
#if defined(__x86_64__)
|
||||
# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
|
||||
#elif defined(__sparc__)
|
||||
# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
|
||||
#elif defined(__powerpc64__)
|
||||
# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
|
||||
#elif defined(__powerpc__)
|
||||
# define MAX_CODE_GEN_BUFFER_SIZE (32 * MiB)
|
||||
#elif defined(__aarch64__)
|
||||
# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
|
||||
#elif defined(__s390x__)
|
||||
/* We have a +- 4GB range on the branches; leave some slop. */
|
||||
# define MAX_CODE_GEN_BUFFER_SIZE (3 * GiB)
|
||||
#elif defined(__mips__)
|
||||
/* We have a 256MB branch region, but leave room to make sure the
|
||||
main executable is also within that region. */
|
||||
# define MAX_CODE_GEN_BUFFER_SIZE (128 * MiB)
|
||||
#else
|
||||
# define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
|
||||
#endif
|
||||
|
||||
#if TCG_TARGET_REG_BITS == 32
|
||||
#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB)
|
||||
#ifdef CONFIG_USER_ONLY
|
||||
/*
|
||||
* For user mode on smaller 32 bit systems we may run into trouble
|
||||
* allocating big chunks of data in the right place. On these systems
|
||||
* we utilise a static code generation buffer directly in the binary.
|
||||
*/
|
||||
#define USE_STATIC_CODE_GEN_BUFFER
|
||||
#endif
|
||||
#else /* TCG_TARGET_REG_BITS == 64 */
|
||||
#ifdef CONFIG_USER_ONLY
|
||||
/*
|
||||
* As user-mode emulation typically means running multiple instances
|
||||
* of the translator don't go too nuts with our default code gen
|
||||
* buffer lest we make things too hard for the OS.
|
||||
*/
|
||||
#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB)
|
||||
#else
|
||||
/*
|
||||
* We expect most system emulation to run one or two guests per host.
|
||||
* Users running large scale system emulation may want to tweak their
|
||||
* runtime setup via the tb-size control on the command line.
|
||||
*/
|
||||
#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define DEFAULT_CODE_GEN_BUFFER_SIZE \
|
||||
(DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
|
||||
? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
|
||||
|
||||
static size_t size_code_gen_buffer(size_t tb_size)
|
||||
{
|
||||
/* Size the buffer. */
|
||||
if (tb_size == 0) {
|
||||
size_t phys_mem = qemu_get_host_physmem();
|
||||
if (phys_mem == 0) {
|
||||
tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
|
||||
} else {
|
||||
tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, phys_mem / 8);
|
||||
}
|
||||
}
|
||||
if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
|
||||
tb_size = MIN_CODE_GEN_BUFFER_SIZE;
|
||||
}
|
||||
if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
|
||||
tb_size = MAX_CODE_GEN_BUFFER_SIZE;
|
||||
}
|
||||
return tb_size;
|
||||
}
|
||||
|
||||
#ifdef __mips__
|
||||
/* In order to use J and JAL within the code_gen_buffer, we require
|
||||
that the buffer not cross a 256MB boundary. */
|
||||
static inline bool cross_256mb(void *addr, size_t size)
|
||||
{
|
||||
return ((uintptr_t)addr ^ ((uintptr_t)addr + size)) & ~0x0ffffffful;
|
||||
}
|
||||
|
||||
/* We weren't able to allocate a buffer without crossing that boundary,
|
||||
so make do with the larger portion of the buffer that doesn't cross.
|
||||
Returns the new base of the buffer, and adjusts code_gen_buffer_size. */
|
||||
static inline void *split_cross_256mb(void *buf1, size_t size1)
|
||||
{
|
||||
void *buf2 = (void *)(((uintptr_t)buf1 + size1) & ~0x0ffffffful);
|
||||
size_t size2 = buf1 + size1 - buf2;
|
||||
|
||||
size1 = buf2 - buf1;
|
||||
if (size1 < size2) {
|
||||
size1 = size2;
|
||||
buf1 = buf2;
|
||||
}
|
||||
|
||||
tcg_ctx->code_gen_buffer_size = size1;
|
||||
return buf1;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_STATIC_CODE_GEN_BUFFER
|
||||
static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
|
||||
__attribute__((aligned(CODE_GEN_ALIGN)));
|
||||
|
||||
static bool alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
|
||||
{
|
||||
void *buf, *end;
|
||||
size_t size;
|
||||
|
||||
if (splitwx > 0) {
|
||||
error_setg(errp, "jit split-wx not supported");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* page-align the beginning and end of the buffer */
|
||||
buf = static_code_gen_buffer;
|
||||
end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
|
||||
buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size);
|
||||
end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size);
|
||||
|
||||
size = end - buf;
|
||||
|
||||
/* Honor a command-line option limiting the size of the buffer. */
|
||||
if (size > tb_size) {
|
||||
size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size);
|
||||
}
|
||||
tcg_ctx->code_gen_buffer_size = size;
|
||||
|
||||
#ifdef __mips__
|
||||
if (cross_256mb(buf, size)) {
|
||||
buf = split_cross_256mb(buf, size);
|
||||
size = tcg_ctx->code_gen_buffer_size;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (qemu_mprotect_rwx(buf, size)) {
|
||||
error_setg_errno(errp, errno, "mprotect of jit buffer");
|
||||
return false;
|
||||
}
|
||||
qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
|
||||
|
||||
tcg_ctx->code_gen_buffer = buf;
|
||||
return true;
|
||||
}
|
||||
#elif defined(_WIN32)
|
||||
static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
|
||||
{
|
||||
void *buf;
|
||||
|
||||
if (splitwx > 0) {
|
||||
error_setg(errp, "jit split-wx not supported");
|
||||
return false;
|
||||
}
|
||||
|
||||
buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
|
||||
PAGE_EXECUTE_READWRITE);
|
||||
if (buf == NULL) {
|
||||
error_setg_win32(errp, GetLastError(),
|
||||
"allocate %zu bytes for jit buffer", size);
|
||||
return false;
|
||||
}
|
||||
|
||||
tcg_ctx->code_gen_buffer = buf;
|
||||
tcg_ctx->code_gen_buffer_size = size;
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
static bool alloc_code_gen_buffer_anon(size_t size, int prot,
|
||||
int flags, Error **errp)
|
||||
{
|
||||
void *buf;
|
||||
|
||||
buf = mmap(NULL, size, prot, flags, -1, 0);
|
||||
if (buf == MAP_FAILED) {
|
||||
error_setg_errno(errp, errno,
|
||||
"allocate %zu bytes for jit buffer", size);
|
||||
return false;
|
||||
}
|
||||
tcg_ctx->code_gen_buffer_size = size;
|
||||
|
||||
#ifdef __mips__
|
||||
if (cross_256mb(buf, size)) {
|
||||
/*
|
||||
* Try again, with the original still mapped, to avoid re-acquiring
|
||||
* the same 256mb crossing.
|
||||
*/
|
||||
size_t size2;
|
||||
void *buf2 = mmap(NULL, size, prot, flags, -1, 0);
|
||||
switch ((int)(buf2 != MAP_FAILED)) {
|
||||
case 1:
|
||||
if (!cross_256mb(buf2, size)) {
|
||||
/* Success! Use the new buffer. */
|
||||
munmap(buf, size);
|
||||
break;
|
||||
}
|
||||
/* Failure. Work with what we had. */
|
||||
munmap(buf2, size);
|
||||
/* fallthru */
|
||||
default:
|
||||
/* Split the original buffer. Free the smaller half. */
|
||||
buf2 = split_cross_256mb(buf, size);
|
||||
size2 = tcg_ctx->code_gen_buffer_size;
|
||||
if (buf == buf2) {
|
||||
munmap(buf + size2, size - size2);
|
||||
} else {
|
||||
munmap(buf, size - size2);
|
||||
}
|
||||
size = size2;
|
||||
break;
|
||||
}
|
||||
buf = buf2;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Request large pages for the buffer. */
|
||||
qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
|
||||
|
||||
tcg_ctx->code_gen_buffer = buf;
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifndef CONFIG_TCG_INTERPRETER
|
||||
#ifdef CONFIG_POSIX
|
||||
#include "qemu/memfd.h"
|
||||
|
||||
static bool alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
|
||||
{
|
||||
void *buf_rw = NULL, *buf_rx = MAP_FAILED;
|
||||
int fd = -1;
|
||||
|
||||
#ifdef __mips__
|
||||
/* Find space for the RX mapping, vs the 256MiB regions. */
|
||||
if (!alloc_code_gen_buffer_anon(size, PROT_NONE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS |
|
||||
MAP_NORESERVE, errp)) {
|
||||
return false;
|
||||
}
|
||||
/* The size of the mapping may have been adjusted. */
|
||||
size = tcg_ctx->code_gen_buffer_size;
|
||||
buf_rx = tcg_ctx->code_gen_buffer;
|
||||
#endif
|
||||
|
||||
buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp);
|
||||
if (buf_rw == NULL) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
#ifdef __mips__
|
||||
void *tmp = mmap(buf_rx, size, PROT_READ | PROT_EXEC,
|
||||
MAP_SHARED | MAP_FIXED, fd, 0);
|
||||
if (tmp != buf_rx) {
|
||||
goto fail_rx;
|
||||
}
|
||||
#else
|
||||
buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
|
||||
if (buf_rx == MAP_FAILED) {
|
||||
goto fail_rx;
|
||||
}
|
||||
#endif
|
||||
|
||||
close(fd);
|
||||
tcg_ctx->code_gen_buffer = buf_rw;
|
||||
tcg_ctx->code_gen_buffer_size = size;
|
||||
tcg_splitwx_diff = buf_rx - buf_rw;
|
||||
|
||||
/* Request large pages for the buffer and the splitwx. */
|
||||
qemu_madvise(buf_rw, size, QEMU_MADV_HUGEPAGE);
|
||||
qemu_madvise(buf_rx, size, QEMU_MADV_HUGEPAGE);
|
||||
return true;
|
||||
|
||||
fail_rx:
|
||||
error_setg_errno(errp, errno, "failed to map shared memory for execute");
|
||||
fail:
|
||||
if (buf_rx != MAP_FAILED) {
|
||||
munmap(buf_rx, size);
|
||||
}
|
||||
if (buf_rw) {
|
||||
munmap(buf_rw, size);
|
||||
}
|
||||
if (fd >= 0) {
|
||||
close(fd);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
#endif /* CONFIG_POSIX */
|
||||
|
||||
#ifdef CONFIG_DARWIN
|
||||
#include <mach/mach.h>
|
||||
|
||||
extern kern_return_t mach_vm_remap(vm_map_t target_task,
|
||||
mach_vm_address_t *target_address,
|
||||
mach_vm_size_t size,
|
||||
mach_vm_offset_t mask,
|
||||
int flags,
|
||||
vm_map_t src_task,
|
||||
mach_vm_address_t src_address,
|
||||
boolean_t copy,
|
||||
vm_prot_t *cur_protection,
|
||||
vm_prot_t *max_protection,
|
||||
vm_inherit_t inheritance);
|
||||
|
||||
static bool alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
|
||||
{
|
||||
kern_return_t ret;
|
||||
mach_vm_address_t buf_rw, buf_rx;
|
||||
vm_prot_t cur_prot, max_prot;
|
||||
|
||||
/* Map the read-write portion via normal anon memory. */
|
||||
if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, errp)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
buf_rw = (mach_vm_address_t)tcg_ctx->code_gen_buffer;
|
||||
buf_rx = 0;
|
||||
ret = mach_vm_remap(mach_task_self(),
|
||||
&buf_rx,
|
||||
size,
|
||||
0,
|
||||
VM_FLAGS_ANYWHERE,
|
||||
mach_task_self(),
|
||||
buf_rw,
|
||||
false,
|
||||
&cur_prot,
|
||||
&max_prot,
|
||||
VM_INHERIT_NONE);
|
||||
if (ret != KERN_SUCCESS) {
|
||||
/* TODO: Convert "ret" to a human readable error message. */
|
||||
error_setg(errp, "vm_remap for jit splitwx failed");
|
||||
munmap((void *)buf_rw, size);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) {
|
||||
error_setg_errno(errp, errno, "mprotect for jit splitwx");
|
||||
munmap((void *)buf_rx, size);
|
||||
munmap((void *)buf_rw, size);
|
||||
return false;
|
||||
}
|
||||
|
||||
tcg_splitwx_diff = buf_rx - buf_rw;
|
||||
return true;
|
||||
}
|
||||
#endif /* CONFIG_DARWIN */
|
||||
#endif /* CONFIG_TCG_INTERPRETER */
|
||||
|
||||
static bool alloc_code_gen_buffer_splitwx(size_t size, Error **errp)
|
||||
{
|
||||
#ifndef CONFIG_TCG_INTERPRETER
|
||||
# ifdef CONFIG_DARWIN
|
||||
return alloc_code_gen_buffer_splitwx_vmremap(size, errp);
|
||||
# endif
|
||||
# ifdef CONFIG_POSIX
|
||||
return alloc_code_gen_buffer_splitwx_memfd(size, errp);
|
||||
# endif
|
||||
#endif
|
||||
error_setg(errp, "jit split-wx not supported");
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
|
||||
{
|
||||
ERRP_GUARD();
|
||||
int prot, flags;
|
||||
|
||||
if (splitwx) {
|
||||
if (alloc_code_gen_buffer_splitwx(size, errp)) {
|
||||
return true;
|
||||
}
|
||||
/*
|
||||
* If splitwx force-on (1), fail;
|
||||
* if splitwx default-on (-1), fall through to splitwx off.
|
||||
*/
|
||||
if (splitwx > 0) {
|
||||
return false;
|
||||
}
|
||||
error_free_or_abort(errp);
|
||||
}
|
||||
|
||||
prot = PROT_READ | PROT_WRITE | PROT_EXEC;
|
||||
flags = MAP_PRIVATE | MAP_ANONYMOUS;
|
||||
#ifdef CONFIG_TCG_INTERPRETER
|
||||
/* The tcg interpreter does not need execute permission. */
|
||||
prot = PROT_READ | PROT_WRITE;
|
||||
#elif defined(CONFIG_DARWIN)
|
||||
/* Applicable to both iOS and macOS (Apple Silicon). */
|
||||
if (!splitwx) {
|
||||
flags |= MAP_JIT;
|
||||
}
|
||||
#endif
|
||||
|
||||
return alloc_code_gen_buffer_anon(size, prot, flags, errp);
|
||||
}
|
||||
#endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
|
||||
|
||||
static bool tb_cmp(const void *ap, const void *bp)
|
||||
{
|
||||
const TranslationBlock *a = ap;
|
||||
|
@ -1316,36 +904,13 @@ static bool tb_cmp(const void *ap, const void *bp)
|
|||
a->page_addr[1] == b->page_addr[1];
|
||||
}
|
||||
|
||||
static void tb_htable_init(void)
|
||||
void tb_htable_init(void)
|
||||
{
|
||||
unsigned int mode = QHT_MODE_AUTO_RESIZE;
|
||||
|
||||
qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
|
||||
}
|
||||
|
||||
/* Must be called before using the QEMU cpus. 'tb_size' is the size
|
||||
(in bytes) allocated to the translation buffer. Zero means default
|
||||
size. */
|
||||
void tcg_exec_init(unsigned long tb_size, int splitwx)
|
||||
{
|
||||
bool ok;
|
||||
|
||||
tcg_allowed = true;
|
||||
cpu_gen_init();
|
||||
page_init();
|
||||
tb_htable_init();
|
||||
|
||||
ok = alloc_code_gen_buffer(size_code_gen_buffer(tb_size),
|
||||
splitwx, &error_fatal);
|
||||
assert(ok);
|
||||
|
||||
#if defined(CONFIG_SOFTMMU)
|
||||
/* There's no guest base to take into account, so go ahead and
|
||||
initialize the prologue now. */
|
||||
tcg_prologue_init(tcg_ctx);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* call with @p->lock held */
|
||||
static inline void invalidate_page_bitmap(PageDesc *p)
|
||||
{
|
||||
|
|
|
@ -813,7 +813,7 @@ int main(int argc, char **argv)
|
|||
envlist_free(envlist);
|
||||
|
||||
/*
|
||||
* Now that page sizes are configured in tcg_exec_init() we can do
|
||||
* Now that page sizes are configured we can do
|
||||
* proper page alignment for guest_base.
|
||||
*/
|
||||
guest_base = HOST_PAGE_ALIGN(guest_base);
|
||||
|
@ -879,7 +879,6 @@ int main(int argc, char **argv)
|
|||
* the real value of GUEST_BASE into account.
|
||||
*/
|
||||
tcg_prologue_init(tcg_ctx);
|
||||
tcg_region_init();
|
||||
|
||||
/* build Task State */
|
||||
memset(ts, 0, sizeof(TaskState));
|
||||
|
|
|
@ -11,13 +11,14 @@ performances.
|
|||
QEMU's dynamic translation backend is called TCG, for "Tiny Code
|
||||
Generator". For more information, please take a look at ``tcg/README``.
|
||||
|
||||
Some notable features of QEMU's dynamic translator are:
|
||||
The following sections outline some notable features and implementation
|
||||
details of QEMU's dynamic translator.
|
||||
|
||||
CPU state optimisations
|
||||
-----------------------
|
||||
|
||||
The target CPUs have many internal states which change the way it
|
||||
evaluates instructions. In order to achieve a good speed, the
|
||||
The target CPUs have many internal states which change the way they
|
||||
evaluate instructions. In order to achieve a good speed, the
|
||||
translation phase considers that some state information of the virtual
|
||||
CPU cannot change in it. The state is recorded in the Translation
|
||||
Block (TB). If the state changes (e.g. privilege level), a new TB will
|
||||
|
@ -31,17 +32,95 @@ Direct block chaining
|
|||
---------------------
|
||||
|
||||
After each translated basic block is executed, QEMU uses the simulated
|
||||
Program Counter (PC) and other cpu state information (such as the CS
|
||||
Program Counter (PC) and other CPU state information (such as the CS
|
||||
segment base value) to find the next basic block.
|
||||
|
||||
In order to accelerate the most common cases where the new simulated PC
|
||||
is known, QEMU can patch a basic block so that it jumps directly to the
|
||||
next one.
|
||||
In its simplest, less optimized form, this is done by exiting from the
|
||||
current TB, going through the TB epilogue, and then back to the
|
||||
main loop. That’s where QEMU looks for the next TB to execute,
|
||||
translating it from the guest architecture if it isn’t already available
|
||||
in memory. Then QEMU proceeds to execute this next TB, starting at the
|
||||
prologue and then moving on to the translated instructions.
|
||||
|
||||
The most portable code uses an indirect jump. An indirect jump makes
|
||||
it easier to make the jump target modification atomic. On some host
|
||||
architectures (such as x86 or PowerPC), the ``JUMP`` opcode is
|
||||
directly patched so that the block chaining has no overhead.
|
||||
Exiting from the TB this way will cause the ``cpu_exec_interrupt()``
|
||||
callback to be re-evaluated before executing additional instructions.
|
||||
It is mandatory to exit this way after any CPU state changes that may
|
||||
unmask interrupts.
|
||||
|
||||
In order to accelerate the cases where the TB for the new
|
||||
simulated PC is already available, QEMU has mechanisms that allow
|
||||
multiple TBs to be chained directly, without having to go back to the
|
||||
main loop as described above. These mechanisms are:
|
||||
|
||||
``lookup_and_goto_ptr``
|
||||
^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Calling ``tcg_gen_lookup_and_goto_ptr()`` will emit a call to
|
||||
``helper_lookup_tb_ptr``. This helper will look for an existing TB that
|
||||
matches the current CPU state. If the destination TB is available its
|
||||
code address is returned, otherwise the address of the JIT epilogue is
|
||||
returned. The call to the helper is always followed by the tcg ``goto_ptr``
|
||||
opcode, which branches to the returned address. In this way, we either
|
||||
branch to the next TB or return to the main loop.
|
||||
|
||||
``goto_tb + exit_tb``
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The translation code usually implements branching by performing the
|
||||
following steps:
|
||||
|
||||
1. Call ``tcg_gen_goto_tb()`` passing a jump slot index (either 0 or 1)
|
||||
as a parameter.
|
||||
|
||||
2. Emit TCG instructions to update the CPU state with any information
|
||||
that has been assumed constant and is required by the main loop to
|
||||
correctly locate and execute the next TB. For most guests, this is
|
||||
just the PC of the branch destination, but others may store additional
|
||||
data. The information updated in this step must be inferable from both
|
||||
``cpu_get_tb_cpu_state()`` and ``cpu_restore_state()``.
|
||||
|
||||
3. Call ``tcg_gen_exit_tb()`` passing the address of the current TB and
|
||||
the jump slot index again.
|
||||
|
||||
Step 1, ``tcg_gen_goto_tb()``, will emit a ``goto_tb`` TCG
|
||||
instruction that later on gets translated to a jump to an address
|
||||
associated with the specified jump slot. Initially, this is the address
|
||||
of step 2's instructions, which update the CPU state information. Step 3,
|
||||
``tcg_gen_exit_tb()``, exits from the current TB returning a tagged
|
||||
pointer composed of the last executed TB’s address and the jump slot
|
||||
index.
|
||||
|
||||
The first time this whole sequence is executed, step 1 simply jumps
|
||||
to step 2. Then the CPU state information gets updated and we exit from
|
||||
the current TB. As a result, the behavior is very similar to the less
|
||||
optimized form described earlier in this section.
|
||||
|
||||
Next, the main loop looks for the next TB to execute using the
|
||||
current CPU state information (creating the TB if it wasn’t already
|
||||
available) and, before starting to execute the new TB’s instructions,
|
||||
patches the previously executed TB by associating one of its jump
|
||||
slots (the one specified in the call to ``tcg_gen_exit_tb()``) with the
|
||||
address of the new TB.
|
||||
|
||||
The next time this previous TB is executed and we get to that same
|
||||
``goto_tb`` step, it will already be patched (assuming the destination TB
|
||||
is still in memory) and will jump directly to the first instruction of
|
||||
the destination TB, without going back to the main loop.
|
||||
|
||||
For the ``goto_tb + exit_tb`` mechanism to be used, the following
|
||||
conditions need to be satisfied:
|
||||
|
||||
* The change in CPU state must be constant, e.g., a direct branch and
|
||||
not an indirect branch.
|
||||
|
||||
* The direct branch cannot cross a page boundary. Memory mappings
|
||||
may change, causing the code at the destination address to change.
|
||||
|
||||
Note that, on step 3 (``tcg_gen_exit_tb()``), in addition to the
|
||||
jump slot index, the address of the TB just executed is also returned.
|
||||
This address corresponds to the TB that will be patched; it may be
|
||||
different than the one that was directly executed from the main loop
|
||||
if the latter had already been chained to other TBs.
|
||||
|
||||
Self-modifying code and translated code invalidation
|
||||
----------------------------------------------------
|
||||
|
|
1
fpu/meson.build
Normal file
1
fpu/meson.build
Normal file
|
@ -0,0 +1 @@
|
|||
specific_ss.add(when: 'CONFIG_TCG', if_true: files('softfloat.c'))
|
|
@ -4818,7 +4818,7 @@ float32 float32_exp2(float32 a, float_status *status)
|
|||
|
||||
float_raise(float_flag_inexact, status);
|
||||
|
||||
float64_unpack_canonical(&xnp, float64_ln2, status);
|
||||
float64_unpack_canonical(&tp, float64_ln2, status);
|
||||
xp = *parts_mul(&xp, &tp, status);
|
||||
xnp = xp;
|
||||
|
||||
|
|
|
@ -512,6 +512,7 @@ void sigaction_invoke(struct sigaction *action,
|
|||
#endif
|
||||
|
||||
int qemu_madvise(void *addr, size_t len, int advice);
|
||||
int qemu_mprotect_rw(void *addr, size_t size);
|
||||
int qemu_mprotect_rwx(void *addr, size_t size);
|
||||
int qemu_mprotect_none(void *addr, size_t size);
|
||||
|
||||
|
|
|
@ -8,8 +8,6 @@
|
|||
#ifndef SYSEMU_TCG_H
|
||||
#define SYSEMU_TCG_H
|
||||
|
||||
void tcg_exec_init(unsigned long tb_size, int splitwx);
|
||||
|
||||
#ifdef CONFIG_TCG
|
||||
extern bool tcg_allowed;
|
||||
#define tcg_enabled() (tcg_allowed)
|
||||
|
|
|
@ -689,22 +689,12 @@ static inline bool temp_readonly(TCGTemp *ts)
|
|||
return ts->kind >= TEMP_FIXED;
|
||||
}
|
||||
|
||||
extern TCGContext tcg_init_ctx;
|
||||
extern __thread TCGContext *tcg_ctx;
|
||||
extern const void *tcg_code_gen_epilogue;
|
||||
extern uintptr_t tcg_splitwx_diff;
|
||||
extern TCGv_env cpu_env;
|
||||
|
||||
static inline bool in_code_gen_buffer(const void *p)
|
||||
{
|
||||
const TCGContext *s = &tcg_init_ctx;
|
||||
/*
|
||||
* Much like it is valid to have a pointer to the byte past the
|
||||
* end of an array (so long as you don't dereference it), allow
|
||||
* a pointer to the byte past the end of the code gen buffer.
|
||||
*/
|
||||
return (size_t)(p - s->code_gen_buffer) <= s->code_gen_buffer_size;
|
||||
}
|
||||
bool in_code_gen_buffer(const void *p);
|
||||
|
||||
#ifdef CONFIG_DEBUG_TCG
|
||||
const void *tcg_splitwx_to_rx(void *rw);
|
||||
|
@ -873,7 +863,6 @@ void *tcg_malloc_internal(TCGContext *s, int size);
|
|||
void tcg_pool_reset(TCGContext *s);
|
||||
TranslationBlock *tcg_tb_alloc(TCGContext *s);
|
||||
|
||||
void tcg_region_init(void);
|
||||
void tb_destroy(TranslationBlock *tb);
|
||||
void tcg_region_reset_all(void);
|
||||
|
||||
|
@ -906,7 +895,7 @@ static inline void *tcg_malloc(int size)
|
|||
}
|
||||
}
|
||||
|
||||
void tcg_context_init(TCGContext *s);
|
||||
void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus);
|
||||
void tcg_register_thread(void);
|
||||
void tcg_prologue_init(TCGContext *s);
|
||||
void tcg_func_start(TCGContext *s);
|
||||
|
@ -1082,6 +1071,16 @@ void tcg_op_remove(TCGContext *s, TCGOp *op);
|
|||
TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, TCGOpcode opc);
|
||||
TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc);
|
||||
|
||||
/**
|
||||
* tcg_remove_ops_after:
|
||||
* @op: target operation
|
||||
*
|
||||
* Discard any opcodes emitted since @op. Expected usage is to save
|
||||
* a starting point with tcg_last_op(), speculatively emit opcodes,
|
||||
* then decide whether or not to keep those opcodes after the fact.
|
||||
*/
|
||||
void tcg_remove_ops_after(TCGOp *op);
|
||||
|
||||
void tcg_optimize(TCGContext *s);
|
||||
|
||||
/* Allocate a new temporary and initialize it with a constant. */
|
||||
|
@ -1096,7 +1095,8 @@ TCGv_vec tcg_const_ones_vec_matching(TCGv_vec);
|
|||
|
||||
/*
|
||||
* Locate or create a read-only temporary that is a constant.
|
||||
* This kind of temporary need not and should not be freed.
|
||||
* This kind of temporary need not be freed, but for convenience
|
||||
* will be silently ignored by tcg_temp_free_*.
|
||||
*/
|
||||
TCGTemp *tcg_constant_internal(TCGType type, int64_t val);
|
||||
|
||||
|
|
|
@ -868,7 +868,6 @@ int main(int argc, char **argv, char **envp)
|
|||
generating the prologue until now so that the prologue can take
|
||||
the real value of GUEST_BASE into account. */
|
||||
tcg_prologue_init(tcg_ctx);
|
||||
tcg_region_init();
|
||||
|
||||
target_cpu_copy_regs(env, regs);
|
||||
|
||||
|
|
12
meson.build
12
meson.build
|
@ -1968,16 +1968,6 @@ subdir('softmmu')
|
|||
|
||||
common_ss.add(capstone)
|
||||
specific_ss.add(files('cpu.c', 'disas.c', 'gdbstub.c'), capstone)
|
||||
specific_ss.add(when: 'CONFIG_TCG', if_true: files(
|
||||
'fpu/softfloat.c',
|
||||
'tcg/optimize.c',
|
||||
'tcg/tcg-common.c',
|
||||
'tcg/tcg-op-gvec.c',
|
||||
'tcg/tcg-op-vec.c',
|
||||
'tcg/tcg-op.c',
|
||||
'tcg/tcg.c',
|
||||
))
|
||||
specific_ss.add(when: 'CONFIG_TCG_INTERPRETER', if_true: files('tcg/tci.c'))
|
||||
|
||||
# Work around a gcc bug/misfeature wherein constant propagation looks
|
||||
# through an alias:
|
||||
|
@ -2007,6 +1997,8 @@ subdir('net')
|
|||
subdir('replay')
|
||||
subdir('semihosting')
|
||||
subdir('hw')
|
||||
subdir('tcg')
|
||||
subdir('fpu')
|
||||
subdir('accel')
|
||||
subdir('plugins')
|
||||
subdir('bsd-user')
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
|
||||
#define TCG_TARGET_INSN_UNIT_SIZE 4
|
||||
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 24
|
||||
#define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
|
||||
#undef TCG_TARGET_STACK_GROWSUP
|
||||
|
||||
typedef enum {
|
||||
|
|
|
@ -1984,7 +1984,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
|
|||
static void tcg_out_epilogue(TCGContext *s);
|
||||
|
||||
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
||||
const TCGArg *args, const int *const_args)
|
||||
const TCGArg args[TCG_MAX_OP_ARGS],
|
||||
const int const_args[TCG_MAX_OP_ARGS])
|
||||
{
|
||||
TCGArg a0, a1, a2, a3, a4, a5;
|
||||
int c;
|
||||
|
|
|
@ -60,6 +60,7 @@ extern int arm_arch;
|
|||
#undef TCG_TARGET_STACK_GROWSUP
|
||||
#define TCG_TARGET_INSN_UNIT_SIZE 4
|
||||
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
|
||||
#define MAX_CODE_GEN_BUFFER_SIZE UINT32_MAX
|
||||
|
||||
typedef enum {
|
||||
TCG_REG_R0 = 0,
|
||||
|
|
|
@ -31,9 +31,11 @@
|
|||
#ifdef __x86_64__
|
||||
# define TCG_TARGET_REG_BITS 64
|
||||
# define TCG_TARGET_NB_REGS 32
|
||||
# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
|
||||
#else
|
||||
# define TCG_TARGET_REG_BITS 32
|
||||
# define TCG_TARGET_NB_REGS 24
|
||||
# define MAX_CODE_GEN_BUFFER_SIZE UINT32_MAX
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
|
|
14
tcg/meson.build
Normal file
14
tcg/meson.build
Normal file
|
@ -0,0 +1,14 @@
|
|||
tcg_ss = ss.source_set()
|
||||
|
||||
tcg_ss.add(files(
|
||||
'optimize.c',
|
||||
'region.c',
|
||||
'tcg.c',
|
||||
'tcg-common.c',
|
||||
'tcg-op.c',
|
||||
'tcg-op-gvec.c',
|
||||
'tcg-op-vec.c',
|
||||
))
|
||||
tcg_ss.add(when: 'CONFIG_TCG_INTERPRETER', if_true: files('tci.c'))
|
||||
|
||||
specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss)
|
|
@ -39,6 +39,12 @@
|
|||
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
|
||||
#define TCG_TARGET_NB_REGS 32
|
||||
|
||||
/*
|
||||
* We have a 256MB branch region, but leave room to make sure the
|
||||
* main executable is also within that region.
|
||||
*/
|
||||
#define MAX_CODE_GEN_BUFFER_SIZE (128 * MiB)
|
||||
|
||||
typedef enum {
|
||||
TCG_REG_ZERO = 0,
|
||||
TCG_REG_AT,
|
||||
|
|
|
@ -27,8 +27,10 @@
|
|||
|
||||
#ifdef _ARCH_PPC64
|
||||
# define TCG_TARGET_REG_BITS 64
|
||||
# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
|
||||
#else
|
||||
# define TCG_TARGET_REG_BITS 32
|
||||
# define MAX_CODE_GEN_BUFFER_SIZE (32 * MiB)
|
||||
#endif
|
||||
|
||||
#define TCG_TARGET_NB_REGS 64
|
||||
|
|
999
tcg/region.c
Normal file
999
tcg/region.c
Normal file
|
@ -0,0 +1,999 @@
|
|||
/*
|
||||
* Memory region management for Tiny Code Generator for QEMU
|
||||
*
|
||||
* Copyright (c) 2008 Fabrice Bellard
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/units.h"
|
||||
#include "qapi/error.h"
|
||||
#include "exec/exec-all.h"
|
||||
#include "tcg/tcg.h"
|
||||
#include "tcg-internal.h"
|
||||
|
||||
|
||||
struct tcg_region_tree {
|
||||
QemuMutex lock;
|
||||
GTree *tree;
|
||||
/* padding to avoid false sharing is computed at run-time */
|
||||
};
|
||||
|
||||
/*
|
||||
* We divide code_gen_buffer into equally-sized "regions" that TCG threads
|
||||
* dynamically allocate from as demand dictates. Given appropriate region
|
||||
* sizing, this minimizes flushes even when some TCG threads generate a lot
|
||||
* more code than others.
|
||||
*/
|
||||
struct tcg_region_state {
|
||||
QemuMutex lock;
|
||||
|
||||
/* fields set at init time */
|
||||
void *start_aligned;
|
||||
void *after_prologue;
|
||||
size_t n;
|
||||
size_t size; /* size of one region */
|
||||
size_t stride; /* .size + guard size */
|
||||
size_t total_size; /* size of entire buffer, >= n * stride */
|
||||
|
||||
/* fields protected by the lock */
|
||||
size_t current; /* current region index */
|
||||
size_t agg_size_full; /* aggregate size of full regions */
|
||||
};
|
||||
|
||||
static struct tcg_region_state region;
|
||||
|
||||
/*
|
||||
* This is an array of struct tcg_region_tree's, with padding.
|
||||
* We use void * to simplify the computation of region_trees[i]; each
|
||||
* struct is found every tree_size bytes.
|
||||
*/
|
||||
static void *region_trees;
|
||||
static size_t tree_size;
|
||||
|
||||
bool in_code_gen_buffer(const void *p)
|
||||
{
|
||||
/*
|
||||
* Much like it is valid to have a pointer to the byte past the
|
||||
* end of an array (so long as you don't dereference it), allow
|
||||
* a pointer to the byte past the end of the code gen buffer.
|
||||
*/
|
||||
return (size_t)(p - region.start_aligned) <= region.total_size;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_TCG
|
||||
const void *tcg_splitwx_to_rx(void *rw)
|
||||
{
|
||||
/* Pass NULL pointers unchanged. */
|
||||
if (rw) {
|
||||
g_assert(in_code_gen_buffer(rw));
|
||||
rw += tcg_splitwx_diff;
|
||||
}
|
||||
return rw;
|
||||
}
|
||||
|
||||
void *tcg_splitwx_to_rw(const void *rx)
|
||||
{
|
||||
/* Pass NULL pointers unchanged. */
|
||||
if (rx) {
|
||||
rx -= tcg_splitwx_diff;
|
||||
/* Assert that we end with a pointer in the rw region. */
|
||||
g_assert(in_code_gen_buffer(rx));
|
||||
}
|
||||
return (void *)rx;
|
||||
}
|
||||
#endif /* CONFIG_DEBUG_TCG */
|
||||
|
||||
/* compare a pointer @ptr and a tb_tc @s */
|
||||
static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
|
||||
{
|
||||
if (ptr >= s->ptr + s->size) {
|
||||
return 1;
|
||||
} else if (ptr < s->ptr) {
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
|
||||
{
|
||||
const struct tb_tc *a = ap;
|
||||
const struct tb_tc *b = bp;
|
||||
|
||||
/*
|
||||
* When both sizes are set, we know this isn't a lookup.
|
||||
* This is the most likely case: every TB must be inserted; lookups
|
||||
* are a lot less frequent.
|
||||
*/
|
||||
if (likely(a->size && b->size)) {
|
||||
if (a->ptr > b->ptr) {
|
||||
return 1;
|
||||
} else if (a->ptr < b->ptr) {
|
||||
return -1;
|
||||
}
|
||||
/* a->ptr == b->ptr should happen only on deletions */
|
||||
g_assert(a->size == b->size);
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* All lookups have either .size field set to 0.
|
||||
* From the glib sources we see that @ap is always the lookup key. However
|
||||
* the docs provide no guarantee, so we just mark this case as likely.
|
||||
*/
|
||||
if (likely(a->size == 0)) {
|
||||
return ptr_cmp_tb_tc(a->ptr, b);
|
||||
}
|
||||
return ptr_cmp_tb_tc(b->ptr, a);
|
||||
}
|
||||
|
||||
static void tcg_region_trees_init(void)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
|
||||
region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
|
||||
for (i = 0; i < region.n; i++) {
|
||||
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
||||
|
||||
qemu_mutex_init(&rt->lock);
|
||||
rt->tree = g_tree_new(tb_tc_cmp);
|
||||
}
|
||||
}
|
||||
|
||||
static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
|
||||
{
|
||||
size_t region_idx;
|
||||
|
||||
/*
|
||||
* Like tcg_splitwx_to_rw, with no assert. The pc may come from
|
||||
* a signal handler over which the caller has no control.
|
||||
*/
|
||||
if (!in_code_gen_buffer(p)) {
|
||||
p -= tcg_splitwx_diff;
|
||||
if (!in_code_gen_buffer(p)) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (p < region.start_aligned) {
|
||||
region_idx = 0;
|
||||
} else {
|
||||
ptrdiff_t offset = p - region.start_aligned;
|
||||
|
||||
if (offset > region.stride * (region.n - 1)) {
|
||||
region_idx = region.n - 1;
|
||||
} else {
|
||||
region_idx = offset / region.stride;
|
||||
}
|
||||
}
|
||||
return region_trees + region_idx * tree_size;
|
||||
}
|
||||
|
||||
void tcg_tb_insert(TranslationBlock *tb)
|
||||
{
|
||||
struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
|
||||
|
||||
g_assert(rt != NULL);
|
||||
qemu_mutex_lock(&rt->lock);
|
||||
g_tree_insert(rt->tree, &tb->tc, tb);
|
||||
qemu_mutex_unlock(&rt->lock);
|
||||
}
|
||||
|
||||
void tcg_tb_remove(TranslationBlock *tb)
|
||||
{
|
||||
struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
|
||||
|
||||
g_assert(rt != NULL);
|
||||
qemu_mutex_lock(&rt->lock);
|
||||
g_tree_remove(rt->tree, &tb->tc);
|
||||
qemu_mutex_unlock(&rt->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the TB 'tb' such that
|
||||
* tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
|
||||
* Return NULL if not found.
|
||||
*/
|
||||
TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
|
||||
{
|
||||
struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
|
||||
TranslationBlock *tb;
|
||||
struct tb_tc s = { .ptr = (void *)tc_ptr };
|
||||
|
||||
if (rt == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
qemu_mutex_lock(&rt->lock);
|
||||
tb = g_tree_lookup(rt->tree, &s);
|
||||
qemu_mutex_unlock(&rt->lock);
|
||||
return tb;
|
||||
}
|
||||
|
||||
static void tcg_region_tree_lock_all(void)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < region.n; i++) {
|
||||
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
||||
|
||||
qemu_mutex_lock(&rt->lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void tcg_region_tree_unlock_all(void)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < region.n; i++) {
|
||||
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
||||
|
||||
qemu_mutex_unlock(&rt->lock);
|
||||
}
|
||||
}
|
||||
|
||||
void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
tcg_region_tree_lock_all();
|
||||
for (i = 0; i < region.n; i++) {
|
||||
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
||||
|
||||
g_tree_foreach(rt->tree, func, user_data);
|
||||
}
|
||||
tcg_region_tree_unlock_all();
|
||||
}
|
||||
|
||||
size_t tcg_nb_tbs(void)
|
||||
{
|
||||
size_t nb_tbs = 0;
|
||||
size_t i;
|
||||
|
||||
tcg_region_tree_lock_all();
|
||||
for (i = 0; i < region.n; i++) {
|
||||
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
||||
|
||||
nb_tbs += g_tree_nnodes(rt->tree);
|
||||
}
|
||||
tcg_region_tree_unlock_all();
|
||||
return nb_tbs;
|
||||
}
|
||||
|
||||
static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
|
||||
{
|
||||
TranslationBlock *tb = v;
|
||||
|
||||
tb_destroy(tb);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static void tcg_region_tree_reset_all(void)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
tcg_region_tree_lock_all();
|
||||
for (i = 0; i < region.n; i++) {
|
||||
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
||||
|
||||
g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
|
||||
/* Increment the refcount first so that destroy acts as a reset */
|
||||
g_tree_ref(rt->tree);
|
||||
g_tree_destroy(rt->tree);
|
||||
}
|
||||
tcg_region_tree_unlock_all();
|
||||
}
|
||||
|
||||
static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
|
||||
{
|
||||
void *start, *end;
|
||||
|
||||
start = region.start_aligned + curr_region * region.stride;
|
||||
end = start + region.size;
|
||||
|
||||
if (curr_region == 0) {
|
||||
start = region.after_prologue;
|
||||
}
|
||||
/* The final region may have a few extra pages due to earlier rounding. */
|
||||
if (curr_region == region.n - 1) {
|
||||
end = region.start_aligned + region.total_size;
|
||||
}
|
||||
|
||||
*pstart = start;
|
||||
*pend = end;
|
||||
}
|
||||
|
||||
static void tcg_region_assign(TCGContext *s, size_t curr_region)
|
||||
{
|
||||
void *start, *end;
|
||||
|
||||
tcg_region_bounds(curr_region, &start, &end);
|
||||
|
||||
s->code_gen_buffer = start;
|
||||
s->code_gen_ptr = start;
|
||||
s->code_gen_buffer_size = end - start;
|
||||
s->code_gen_highwater = end - TCG_HIGHWATER;
|
||||
}
|
||||
|
||||
static bool tcg_region_alloc__locked(TCGContext *s)
|
||||
{
|
||||
if (region.current == region.n) {
|
||||
return true;
|
||||
}
|
||||
tcg_region_assign(s, region.current);
|
||||
region.current++;
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Request a new region once the one in use has filled up.
|
||||
* Returns true on error.
|
||||
*/
|
||||
bool tcg_region_alloc(TCGContext *s)
|
||||
{
|
||||
bool err;
|
||||
/* read the region size now; alloc__locked will overwrite it on success */
|
||||
size_t size_full = s->code_gen_buffer_size;
|
||||
|
||||
qemu_mutex_lock(®ion.lock);
|
||||
err = tcg_region_alloc__locked(s);
|
||||
if (!err) {
|
||||
region.agg_size_full += size_full - TCG_HIGHWATER;
|
||||
}
|
||||
qemu_mutex_unlock(®ion.lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform a context's first region allocation.
|
||||
* This function does _not_ increment region.agg_size_full.
|
||||
*/
|
||||
static void tcg_region_initial_alloc__locked(TCGContext *s)
|
||||
{
|
||||
bool err = tcg_region_alloc__locked(s);
|
||||
g_assert(!err);
|
||||
}
|
||||
|
||||
void tcg_region_initial_alloc(TCGContext *s)
|
||||
{
|
||||
qemu_mutex_lock(®ion.lock);
|
||||
tcg_region_initial_alloc__locked(s);
|
||||
qemu_mutex_unlock(®ion.lock);
|
||||
}
|
||||
|
||||
/* Call from a safe-work context */
|
||||
void tcg_region_reset_all(void)
|
||||
{
|
||||
unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
|
||||
unsigned int i;
|
||||
|
||||
qemu_mutex_lock(®ion.lock);
|
||||
region.current = 0;
|
||||
region.agg_size_full = 0;
|
||||
|
||||
for (i = 0; i < n_ctxs; i++) {
|
||||
TCGContext *s = qatomic_read(&tcg_ctxs[i]);
|
||||
tcg_region_initial_alloc__locked(s);
|
||||
}
|
||||
qemu_mutex_unlock(®ion.lock);
|
||||
|
||||
tcg_region_tree_reset_all();
|
||||
}
|
||||
|
||||
static size_t tcg_n_regions(size_t tb_size, unsigned max_cpus)
|
||||
{
|
||||
#ifdef CONFIG_USER_ONLY
|
||||
return 1;
|
||||
#else
|
||||
size_t n_regions;
|
||||
|
||||
/*
|
||||
* It is likely that some vCPUs will translate more code than others,
|
||||
* so we first try to set more regions than max_cpus, with those regions
|
||||
* being of reasonable size. If that's not possible we make do by evenly
|
||||
* dividing the code_gen_buffer among the vCPUs.
|
||||
*/
|
||||
/* Use a single region if all we have is one vCPU thread */
|
||||
if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to have more regions than max_cpus, with each region being >= 2 MB.
|
||||
* If we can't, then just allocate one region per vCPU thread.
|
||||
*/
|
||||
n_regions = tb_size / (2 * MiB);
|
||||
if (n_regions <= max_cpus) {
|
||||
return max_cpus;
|
||||
}
|
||||
return MIN(n_regions, max_cpus * 8);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Minimum size of the code gen buffer. This number is randomly chosen,
|
||||
* but not so small that we can't have a fair number of TB's live.
|
||||
*
|
||||
* Maximum size, MAX_CODE_GEN_BUFFER_SIZE, is defined in tcg-target.h.
|
||||
* Unless otherwise indicated, this is constrained by the range of
|
||||
* direct branches on the host cpu, as used by the TCG implementation
|
||||
* of goto_tb.
|
||||
*/
|
||||
#define MIN_CODE_GEN_BUFFER_SIZE (1 * MiB)
|
||||
|
||||
#if TCG_TARGET_REG_BITS == 32
|
||||
#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB)
|
||||
#ifdef CONFIG_USER_ONLY
|
||||
/*
|
||||
* For user mode on smaller 32 bit systems we may run into trouble
|
||||
* allocating big chunks of data in the right place. On these systems
|
||||
* we utilise a static code generation buffer directly in the binary.
|
||||
*/
|
||||
#define USE_STATIC_CODE_GEN_BUFFER
|
||||
#endif
|
||||
#else /* TCG_TARGET_REG_BITS == 64 */
|
||||
#ifdef CONFIG_USER_ONLY
|
||||
/*
|
||||
* As user-mode emulation typically means running multiple instances
|
||||
* of the translator don't go too nuts with our default code gen
|
||||
* buffer lest we make things too hard for the OS.
|
||||
*/
|
||||
#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB)
|
||||
#else
|
||||
/*
|
||||
* We expect most system emulation to run one or two guests per host.
|
||||
* Users running large scale system emulation may want to tweak their
|
||||
* runtime setup via the tb-size control on the command line.
|
||||
*/
|
||||
#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define DEFAULT_CODE_GEN_BUFFER_SIZE \
|
||||
(DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
|
||||
? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
|
||||
|
||||
#ifdef __mips__
|
||||
/*
|
||||
* In order to use J and JAL within the code_gen_buffer, we require
|
||||
* that the buffer not cross a 256MB boundary.
|
||||
*/
|
||||
static inline bool cross_256mb(void *addr, size_t size)
|
||||
{
|
||||
return ((uintptr_t)addr ^ ((uintptr_t)addr + size)) & ~0x0ffffffful;
|
||||
}
|
||||
|
||||
/*
|
||||
* We weren't able to allocate a buffer without crossing that boundary,
|
||||
* so make do with the larger portion of the buffer that doesn't cross.
|
||||
* Returns the new base and size of the buffer in *obuf and *osize.
|
||||
*/
|
||||
static inline void split_cross_256mb(void **obuf, size_t *osize,
|
||||
void *buf1, size_t size1)
|
||||
{
|
||||
void *buf2 = (void *)(((uintptr_t)buf1 + size1) & ~0x0ffffffful);
|
||||
size_t size2 = buf1 + size1 - buf2;
|
||||
|
||||
size1 = buf2 - buf1;
|
||||
if (size1 < size2) {
|
||||
size1 = size2;
|
||||
buf1 = buf2;
|
||||
}
|
||||
|
||||
*obuf = buf1;
|
||||
*osize = size1;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_STATIC_CODE_GEN_BUFFER
|
||||
static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
|
||||
__attribute__((aligned(CODE_GEN_ALIGN)));
|
||||
|
||||
static int alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
|
||||
{
|
||||
void *buf, *end;
|
||||
size_t size;
|
||||
|
||||
if (splitwx > 0) {
|
||||
error_setg(errp, "jit split-wx not supported");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* page-align the beginning and end of the buffer */
|
||||
buf = static_code_gen_buffer;
|
||||
end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
|
||||
buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size);
|
||||
end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size);
|
||||
|
||||
size = end - buf;
|
||||
|
||||
/* Honor a command-line option limiting the size of the buffer. */
|
||||
if (size > tb_size) {
|
||||
size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size);
|
||||
}
|
||||
|
||||
#ifdef __mips__
|
||||
if (cross_256mb(buf, size)) {
|
||||
split_cross_256mb(&buf, &size, buf, size);
|
||||
}
|
||||
#endif
|
||||
|
||||
region.start_aligned = buf;
|
||||
region.total_size = size;
|
||||
|
||||
return PROT_READ | PROT_WRITE;
|
||||
}
|
||||
#elif defined(_WIN32)
|
||||
static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
|
||||
{
|
||||
void *buf;
|
||||
|
||||
if (splitwx > 0) {
|
||||
error_setg(errp, "jit split-wx not supported");
|
||||
return -1;
|
||||
}
|
||||
|
||||
buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
|
||||
PAGE_EXECUTE_READWRITE);
|
||||
if (buf == NULL) {
|
||||
error_setg_win32(errp, GetLastError(),
|
||||
"allocate %zu bytes for jit buffer", size);
|
||||
return false;
|
||||
}
|
||||
|
||||
region.start_aligned = buf;
|
||||
region.total_size = size;
|
||||
|
||||
return PAGE_READ | PAGE_WRITE | PAGE_EXEC;
|
||||
}
|
||||
#else
|
||||
static int alloc_code_gen_buffer_anon(size_t size, int prot,
|
||||
int flags, Error **errp)
|
||||
{
|
||||
void *buf;
|
||||
|
||||
buf = mmap(NULL, size, prot, flags, -1, 0);
|
||||
if (buf == MAP_FAILED) {
|
||||
error_setg_errno(errp, errno,
|
||||
"allocate %zu bytes for jit buffer", size);
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef __mips__
|
||||
if (cross_256mb(buf, size)) {
|
||||
/*
|
||||
* Try again, with the original still mapped, to avoid re-acquiring
|
||||
* the same 256mb crossing.
|
||||
*/
|
||||
size_t size2;
|
||||
void *buf2 = mmap(NULL, size, prot, flags, -1, 0);
|
||||
switch ((int)(buf2 != MAP_FAILED)) {
|
||||
case 1:
|
||||
if (!cross_256mb(buf2, size)) {
|
||||
/* Success! Use the new buffer. */
|
||||
munmap(buf, size);
|
||||
break;
|
||||
}
|
||||
/* Failure. Work with what we had. */
|
||||
munmap(buf2, size);
|
||||
/* fallthru */
|
||||
default:
|
||||
/* Split the original buffer. Free the smaller half. */
|
||||
split_cross_256mb(&buf2, &size2, buf, size);
|
||||
if (buf == buf2) {
|
||||
munmap(buf + size2, size - size2);
|
||||
} else {
|
||||
munmap(buf, size - size2);
|
||||
}
|
||||
size = size2;
|
||||
break;
|
||||
}
|
||||
buf = buf2;
|
||||
}
|
||||
#endif
|
||||
|
||||
region.start_aligned = buf;
|
||||
region.total_size = size;
|
||||
return prot;
|
||||
}
|
||||
|
||||
#ifndef CONFIG_TCG_INTERPRETER
|
||||
#ifdef CONFIG_POSIX
|
||||
#include "qemu/memfd.h"
|
||||
|
||||
static bool alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
|
||||
{
|
||||
void *buf_rw = NULL, *buf_rx = MAP_FAILED;
|
||||
int fd = -1;
|
||||
|
||||
#ifdef __mips__
|
||||
/* Find space for the RX mapping, vs the 256MiB regions. */
|
||||
if (alloc_code_gen_buffer_anon(size, PROT_NONE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS |
|
||||
MAP_NORESERVE, errp) < 0) {
|
||||
return false;
|
||||
}
|
||||
/* The size of the mapping may have been adjusted. */
|
||||
buf_rx = region.start_aligned;
|
||||
size = region.total_size;
|
||||
#endif
|
||||
|
||||
buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp);
|
||||
if (buf_rw == NULL) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
#ifdef __mips__
|
||||
void *tmp = mmap(buf_rx, size, PROT_READ | PROT_EXEC,
|
||||
MAP_SHARED | MAP_FIXED, fd, 0);
|
||||
if (tmp != buf_rx) {
|
||||
goto fail_rx;
|
||||
}
|
||||
#else
|
||||
buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
|
||||
if (buf_rx == MAP_FAILED) {
|
||||
goto fail_rx;
|
||||
}
|
||||
#endif
|
||||
|
||||
close(fd);
|
||||
region.start_aligned = buf_rw;
|
||||
region.total_size = size;
|
||||
tcg_splitwx_diff = buf_rx - buf_rw;
|
||||
|
||||
return PROT_READ | PROT_WRITE;
|
||||
|
||||
fail_rx:
|
||||
error_setg_errno(errp, errno, "failed to map shared memory for execute");
|
||||
fail:
|
||||
if (buf_rx != MAP_FAILED) {
|
||||
munmap(buf_rx, size);
|
||||
}
|
||||
if (buf_rw) {
|
||||
munmap(buf_rw, size);
|
||||
}
|
||||
if (fd >= 0) {
|
||||
close(fd);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
#endif /* CONFIG_POSIX */
|
||||
|
||||
#ifdef CONFIG_DARWIN
|
||||
#include <mach/mach.h>
|
||||
|
||||
extern kern_return_t mach_vm_remap(vm_map_t target_task,
|
||||
mach_vm_address_t *target_address,
|
||||
mach_vm_size_t size,
|
||||
mach_vm_offset_t mask,
|
||||
int flags,
|
||||
vm_map_t src_task,
|
||||
mach_vm_address_t src_address,
|
||||
boolean_t copy,
|
||||
vm_prot_t *cur_protection,
|
||||
vm_prot_t *max_protection,
|
||||
vm_inherit_t inheritance);
|
||||
|
||||
static int alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
|
||||
{
|
||||
kern_return_t ret;
|
||||
mach_vm_address_t buf_rw, buf_rx;
|
||||
vm_prot_t cur_prot, max_prot;
|
||||
|
||||
/* Map the read-write portion via normal anon memory. */
|
||||
if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, errp)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
buf_rw = (mach_vm_address_t)region.start_aligned;
|
||||
buf_rx = 0;
|
||||
ret = mach_vm_remap(mach_task_self(),
|
||||
&buf_rx,
|
||||
size,
|
||||
0,
|
||||
VM_FLAGS_ANYWHERE,
|
||||
mach_task_self(),
|
||||
buf_rw,
|
||||
false,
|
||||
&cur_prot,
|
||||
&max_prot,
|
||||
VM_INHERIT_NONE);
|
||||
if (ret != KERN_SUCCESS) {
|
||||
/* TODO: Convert "ret" to a human readable error message. */
|
||||
error_setg(errp, "vm_remap for jit splitwx failed");
|
||||
munmap((void *)buf_rw, size);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) {
|
||||
error_setg_errno(errp, errno, "mprotect for jit splitwx");
|
||||
munmap((void *)buf_rx, size);
|
||||
munmap((void *)buf_rw, size);
|
||||
return -1;
|
||||
}
|
||||
|
||||
tcg_splitwx_diff = buf_rx - buf_rw;
|
||||
return PROT_READ | PROT_WRITE;
|
||||
}
|
||||
#endif /* CONFIG_DARWIN */
|
||||
#endif /* CONFIG_TCG_INTERPRETER */
|
||||
|
||||
static int alloc_code_gen_buffer_splitwx(size_t size, Error **errp)
|
||||
{
|
||||
#ifndef CONFIG_TCG_INTERPRETER
|
||||
# ifdef CONFIG_DARWIN
|
||||
return alloc_code_gen_buffer_splitwx_vmremap(size, errp);
|
||||
# endif
|
||||
# ifdef CONFIG_POSIX
|
||||
return alloc_code_gen_buffer_splitwx_memfd(size, errp);
|
||||
# endif
|
||||
#endif
|
||||
error_setg(errp, "jit split-wx not supported");
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
|
||||
{
|
||||
ERRP_GUARD();
|
||||
int prot, flags;
|
||||
|
||||
if (splitwx) {
|
||||
prot = alloc_code_gen_buffer_splitwx(size, errp);
|
||||
if (prot >= 0) {
|
||||
return prot;
|
||||
}
|
||||
/*
|
||||
* If splitwx force-on (1), fail;
|
||||
* if splitwx default-on (-1), fall through to splitwx off.
|
||||
*/
|
||||
if (splitwx > 0) {
|
||||
return -1;
|
||||
}
|
||||
error_free_or_abort(errp);
|
||||
}
|
||||
|
||||
/*
|
||||
* macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect
|
||||
* rejects a permission change from RWX -> NONE when reserving the
|
||||
* guard pages later. We can go the other way with the same number
|
||||
* of syscalls, so always begin with PROT_NONE.
|
||||
*/
|
||||
prot = PROT_NONE;
|
||||
flags = MAP_PRIVATE | MAP_ANONYMOUS;
|
||||
#ifdef CONFIG_DARWIN
|
||||
/* Applicable to both iOS and macOS (Apple Silicon). */
|
||||
if (!splitwx) {
|
||||
flags |= MAP_JIT;
|
||||
}
|
||||
#endif
|
||||
|
||||
return alloc_code_gen_buffer_anon(size, prot, flags, errp);
|
||||
}
|
||||
#endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
|
||||
|
||||
/*
|
||||
* Initializes region partitioning.
|
||||
*
|
||||
* Called at init time from the parent thread (i.e. the one calling
|
||||
* tcg_context_init), after the target's TCG globals have been set.
|
||||
*
|
||||
* Region partitioning works by splitting code_gen_buffer into separate regions,
|
||||
* and then assigning regions to TCG threads so that the threads can translate
|
||||
* code in parallel without synchronization.
|
||||
*
|
||||
* In softmmu the number of TCG threads is bounded by max_cpus, so we use at
|
||||
* least max_cpus regions in MTTCG. In !MTTCG we use a single region.
|
||||
* Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
|
||||
* must have been parsed before calling this function, since it calls
|
||||
* qemu_tcg_mttcg_enabled().
|
||||
*
|
||||
* In user-mode we use a single region. Having multiple regions in user-mode
|
||||
* is not supported, because the number of vCPU threads (recall that each thread
|
||||
* spawned by the guest corresponds to a vCPU thread) is only bounded by the
|
||||
* OS, and usually this number is huge (tens of thousands is not uncommon).
|
||||
* Thus, given this large bound on the number of vCPU threads and the fact
|
||||
* that code_gen_buffer is allocated at compile-time, we cannot guarantee
|
||||
* that the availability of at least one region per vCPU thread.
|
||||
*
|
||||
* However, this user-mode limitation is unlikely to be a significant problem
|
||||
* in practice. Multi-threaded guests share most if not all of their translated
|
||||
* code, which makes parallel code generation less appealing than in softmmu.
|
||||
*/
|
||||
void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus)
|
||||
{
|
||||
const size_t page_size = qemu_real_host_page_size;
|
||||
size_t region_size;
|
||||
int have_prot, need_prot;
|
||||
|
||||
/* Size the buffer. */
|
||||
if (tb_size == 0) {
|
||||
size_t phys_mem = qemu_get_host_physmem();
|
||||
if (phys_mem == 0) {
|
||||
tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
|
||||
} else {
|
||||
tb_size = QEMU_ALIGN_DOWN(phys_mem / 8, page_size);
|
||||
tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, tb_size);
|
||||
}
|
||||
}
|
||||
if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
|
||||
tb_size = MIN_CODE_GEN_BUFFER_SIZE;
|
||||
}
|
||||
if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
|
||||
tb_size = MAX_CODE_GEN_BUFFER_SIZE;
|
||||
}
|
||||
|
||||
have_prot = alloc_code_gen_buffer(tb_size, splitwx, &error_fatal);
|
||||
assert(have_prot >= 0);
|
||||
|
||||
/* Request large pages for the buffer and the splitwx. */
|
||||
qemu_madvise(region.start_aligned, region.total_size, QEMU_MADV_HUGEPAGE);
|
||||
if (tcg_splitwx_diff) {
|
||||
qemu_madvise(region.start_aligned + tcg_splitwx_diff,
|
||||
region.total_size, QEMU_MADV_HUGEPAGE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Make region_size a multiple of page_size, using aligned as the start.
|
||||
* As a result of this we might end up with a few extra pages at the end of
|
||||
* the buffer; we will assign those to the last region.
|
||||
*/
|
||||
region.n = tcg_n_regions(tb_size, max_cpus);
|
||||
region_size = tb_size / region.n;
|
||||
region_size = QEMU_ALIGN_DOWN(region_size, page_size);
|
||||
|
||||
/* A region must have at least 2 pages; one code, one guard */
|
||||
g_assert(region_size >= 2 * page_size);
|
||||
region.stride = region_size;
|
||||
|
||||
/* Reserve space for guard pages. */
|
||||
region.size = region_size - page_size;
|
||||
region.total_size -= page_size;
|
||||
|
||||
/*
|
||||
* The first region will be smaller than the others, via the prologue,
|
||||
* which has yet to be allocated. For now, the first region begins at
|
||||
* the page boundary.
|
||||
*/
|
||||
region.after_prologue = region.start_aligned;
|
||||
|
||||
/* init the region struct */
|
||||
qemu_mutex_init(®ion.lock);
|
||||
|
||||
/*
|
||||
* Set guard pages in the rw buffer, as that's the one into which
|
||||
* buffer overruns could occur. Do not set guard pages in the rx
|
||||
* buffer -- let that one use hugepages throughout.
|
||||
* Work with the page protections set up with the initial mapping.
|
||||
*/
|
||||
need_prot = PAGE_READ | PAGE_WRITE;
|
||||
#ifndef CONFIG_TCG_INTERPRETER
|
||||
if (tcg_splitwx_diff == 0) {
|
||||
need_prot |= PAGE_EXEC;
|
||||
}
|
||||
#endif
|
||||
for (size_t i = 0, n = region.n; i < n; i++) {
|
||||
void *start, *end;
|
||||
|
||||
tcg_region_bounds(i, &start, &end);
|
||||
if (have_prot != need_prot) {
|
||||
int rc;
|
||||
|
||||
if (need_prot == (PAGE_READ | PAGE_WRITE | PAGE_EXEC)) {
|
||||
rc = qemu_mprotect_rwx(start, end - start);
|
||||
} else if (need_prot == (PAGE_READ | PAGE_WRITE)) {
|
||||
rc = qemu_mprotect_rw(start, end - start);
|
||||
} else {
|
||||
g_assert_not_reached();
|
||||
}
|
||||
if (rc) {
|
||||
error_setg_errno(&error_fatal, errno,
|
||||
"mprotect of jit buffer");
|
||||
}
|
||||
}
|
||||
if (have_prot != 0) {
|
||||
/* Guard pages are nice for bug detection but are not essential. */
|
||||
(void)qemu_mprotect_none(end, page_size);
|
||||
}
|
||||
}
|
||||
|
||||
tcg_region_trees_init();
|
||||
|
||||
/*
|
||||
* Leave the initial context initialized to the first region.
|
||||
* This will be the context into which we generate the prologue.
|
||||
* It is also the only context for CONFIG_USER_ONLY.
|
||||
*/
|
||||
tcg_region_initial_alloc__locked(&tcg_init_ctx);
|
||||
}
|
||||
|
||||
void tcg_region_prologue_set(TCGContext *s)
|
||||
{
|
||||
/* Deduct the prologue from the first region. */
|
||||
g_assert(region.start_aligned == s->code_gen_buffer);
|
||||
region.after_prologue = s->code_ptr;
|
||||
|
||||
/* Recompute boundaries of the first region. */
|
||||
tcg_region_assign(s, 0);
|
||||
|
||||
/* Register the balance of the buffer with gdb. */
|
||||
tcg_register_jit(tcg_splitwx_to_rx(region.after_prologue),
|
||||
region.start_aligned + region.total_size -
|
||||
region.after_prologue);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the size (in bytes) of all translated code (i.e. from all regions)
|
||||
* currently in the cache.
|
||||
* See also: tcg_code_capacity()
|
||||
* Do not confuse with tcg_current_code_size(); that one applies to a single
|
||||
* TCG context.
|
||||
*/
|
||||
size_t tcg_code_size(void)
|
||||
{
|
||||
unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
|
||||
unsigned int i;
|
||||
size_t total;
|
||||
|
||||
qemu_mutex_lock(®ion.lock);
|
||||
total = region.agg_size_full;
|
||||
for (i = 0; i < n_ctxs; i++) {
|
||||
const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
|
||||
size_t size;
|
||||
|
||||
size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
|
||||
g_assert(size <= s->code_gen_buffer_size);
|
||||
total += size;
|
||||
}
|
||||
qemu_mutex_unlock(®ion.lock);
|
||||
return total;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the code capacity (in bytes) of the entire cache, i.e. including all
|
||||
* regions.
|
||||
* See also: tcg_code_size()
|
||||
*/
|
||||
size_t tcg_code_capacity(void)
|
||||
{
|
||||
size_t guard_size, capacity;
|
||||
|
||||
/* no need for synchronization; these variables are set at init time */
|
||||
guard_size = region.stride - region.size;
|
||||
capacity = region.total_size;
|
||||
capacity -= (region.n - 1) * guard_size;
|
||||
capacity -= region.n * TCG_HIGHWATER;
|
||||
|
||||
return capacity;
|
||||
}
|
||||
|
||||
size_t tcg_tb_phys_invalidate_count(void)
|
||||
{
|
||||
unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
|
||||
unsigned int i;
|
||||
size_t total = 0;
|
||||
|
||||
for (i = 0; i < n_ctxs; i++) {
|
||||
const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
|
||||
|
||||
total += qatomic_read(&s->tb_phys_invalidate_count);
|
||||
}
|
||||
return total;
|
||||
}
|
|
@ -34,6 +34,7 @@
|
|||
#define TCG_TARGET_INSN_UNIT_SIZE 4
|
||||
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 20
|
||||
#define TCG_TARGET_NB_REGS 32
|
||||
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
|
||||
|
||||
typedef enum {
|
||||
TCG_REG_ZERO,
|
||||
|
|
|
@ -28,6 +28,9 @@
|
|||
#define TCG_TARGET_INSN_UNIT_SIZE 2
|
||||
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 19
|
||||
|
||||
/* We have a +- 4GB range on the branches; leave some slop. */
|
||||
#define MAX_CODE_GEN_BUFFER_SIZE (3 * GiB)
|
||||
|
||||
typedef enum TCGReg {
|
||||
TCG_REG_R0 = 0,
|
||||
TCG_REG_R1,
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#define TCG_TARGET_INSN_UNIT_SIZE 4
|
||||
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
|
||||
#define TCG_TARGET_NB_REGS 32
|
||||
#define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
|
||||
|
||||
typedef enum {
|
||||
TCG_REG_G0 = 0,
|
||||
|
|
40
tcg/tcg-internal.h
Normal file
40
tcg/tcg-internal.h
Normal file
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Internal declarations for Tiny Code Generator for QEMU
|
||||
*
|
||||
* Copyright (c) 2008 Fabrice Bellard
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef TCG_INTERNAL_H
|
||||
#define TCG_INTERNAL_H 1
|
||||
|
||||
#define TCG_HIGHWATER 1024
|
||||
|
||||
extern TCGContext tcg_init_ctx;
|
||||
extern TCGContext **tcg_ctxs;
|
||||
extern unsigned int tcg_cur_ctxs;
|
||||
extern unsigned int tcg_max_ctxs;
|
||||
|
||||
void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus);
|
||||
bool tcg_region_alloc(TCGContext *s);
|
||||
void tcg_region_initial_alloc(TCGContext *s);
|
||||
void tcg_region_prologue_set(TCGContext *s);
|
||||
|
||||
#endif /* TCG_INTERNAL_H */
|
649
tcg/tcg.c
649
tcg/tcg.c
|
@ -43,11 +43,6 @@
|
|||
#define NO_CPU_IO_DEFS
|
||||
|
||||
#include "exec/exec-all.h"
|
||||
|
||||
#if !defined(CONFIG_USER_ONLY)
|
||||
#include "hw/boards.h"
|
||||
#endif
|
||||
|
||||
#include "tcg/tcg-op.h"
|
||||
|
||||
#if UINTPTR_MAX == UINT32_MAX
|
||||
|
@ -63,6 +58,7 @@
|
|||
|
||||
#include "elf.h"
|
||||
#include "exec/log.h"
|
||||
#include "tcg-internal.h"
|
||||
|
||||
/* Forward declarations for functions declared in tcg-target.c.inc and
|
||||
used here. */
|
||||
|
@ -153,10 +149,12 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
|
|||
static int tcg_out_ldst_finalize(TCGContext *s);
|
||||
#endif
|
||||
|
||||
#define TCG_HIGHWATER 1024
|
||||
TCGContext tcg_init_ctx;
|
||||
__thread TCGContext *tcg_ctx;
|
||||
|
||||
static TCGContext **tcg_ctxs;
|
||||
static unsigned int n_tcg_ctxs;
|
||||
TCGContext **tcg_ctxs;
|
||||
unsigned int tcg_cur_ctxs;
|
||||
unsigned int tcg_max_ctxs;
|
||||
TCGv_env cpu_env = 0;
|
||||
const void *tcg_code_gen_epilogue;
|
||||
uintptr_t tcg_splitwx_diff;
|
||||
|
@ -165,42 +163,6 @@ uintptr_t tcg_splitwx_diff;
|
|||
tcg_prologue_fn *tcg_qemu_tb_exec;
|
||||
#endif
|
||||
|
||||
struct tcg_region_tree {
|
||||
QemuMutex lock;
|
||||
GTree *tree;
|
||||
/* padding to avoid false sharing is computed at run-time */
|
||||
};
|
||||
|
||||
/*
|
||||
* We divide code_gen_buffer into equally-sized "regions" that TCG threads
|
||||
* dynamically allocate from as demand dictates. Given appropriate region
|
||||
* sizing, this minimizes flushes even when some TCG threads generate a lot
|
||||
* more code than others.
|
||||
*/
|
||||
struct tcg_region_state {
|
||||
QemuMutex lock;
|
||||
|
||||
/* fields set at init time */
|
||||
void *start;
|
||||
void *start_aligned;
|
||||
void *end;
|
||||
size_t n;
|
||||
size_t size; /* size of one region */
|
||||
size_t stride; /* .size + guard size */
|
||||
|
||||
/* fields protected by the lock */
|
||||
size_t current; /* current region index */
|
||||
size_t agg_size_full; /* aggregate size of full regions */
|
||||
};
|
||||
|
||||
static struct tcg_region_state region;
|
||||
/*
|
||||
* This is an array of struct tcg_region_tree's, with padding.
|
||||
* We use void * to simplify the computation of region_trees[i]; each
|
||||
* struct is found every tree_size bytes.
|
||||
*/
|
||||
static void *region_trees;
|
||||
static size_t tree_size;
|
||||
static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
|
||||
static TCGRegSet tcg_target_call_clobber_regs;
|
||||
|
||||
|
@ -457,456 +419,6 @@ static const TCGTargetOpDef constraint_sets[] = {
|
|||
|
||||
#include "tcg-target.c.inc"
|
||||
|
||||
/* compare a pointer @ptr and a tb_tc @s */
|
||||
static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
|
||||
{
|
||||
if (ptr >= s->ptr + s->size) {
|
||||
return 1;
|
||||
} else if (ptr < s->ptr) {
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
|
||||
{
|
||||
const struct tb_tc *a = ap;
|
||||
const struct tb_tc *b = bp;
|
||||
|
||||
/*
|
||||
* When both sizes are set, we know this isn't a lookup.
|
||||
* This is the most likely case: every TB must be inserted; lookups
|
||||
* are a lot less frequent.
|
||||
*/
|
||||
if (likely(a->size && b->size)) {
|
||||
if (a->ptr > b->ptr) {
|
||||
return 1;
|
||||
} else if (a->ptr < b->ptr) {
|
||||
return -1;
|
||||
}
|
||||
/* a->ptr == b->ptr should happen only on deletions */
|
||||
g_assert(a->size == b->size);
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* All lookups have either .size field set to 0.
|
||||
* From the glib sources we see that @ap is always the lookup key. However
|
||||
* the docs provide no guarantee, so we just mark this case as likely.
|
||||
*/
|
||||
if (likely(a->size == 0)) {
|
||||
return ptr_cmp_tb_tc(a->ptr, b);
|
||||
}
|
||||
return ptr_cmp_tb_tc(b->ptr, a);
|
||||
}
|
||||
|
||||
static void tcg_region_trees_init(void)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
|
||||
region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
|
||||
for (i = 0; i < region.n; i++) {
|
||||
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
||||
|
||||
qemu_mutex_init(&rt->lock);
|
||||
rt->tree = g_tree_new(tb_tc_cmp);
|
||||
}
|
||||
}
|
||||
|
||||
static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
|
||||
{
|
||||
size_t region_idx;
|
||||
|
||||
/*
|
||||
* Like tcg_splitwx_to_rw, with no assert. The pc may come from
|
||||
* a signal handler over which the caller has no control.
|
||||
*/
|
||||
if (!in_code_gen_buffer(p)) {
|
||||
p -= tcg_splitwx_diff;
|
||||
if (!in_code_gen_buffer(p)) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (p < region.start_aligned) {
|
||||
region_idx = 0;
|
||||
} else {
|
||||
ptrdiff_t offset = p - region.start_aligned;
|
||||
|
||||
if (offset > region.stride * (region.n - 1)) {
|
||||
region_idx = region.n - 1;
|
||||
} else {
|
||||
region_idx = offset / region.stride;
|
||||
}
|
||||
}
|
||||
return region_trees + region_idx * tree_size;
|
||||
}
|
||||
|
||||
void tcg_tb_insert(TranslationBlock *tb)
|
||||
{
|
||||
struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
|
||||
|
||||
g_assert(rt != NULL);
|
||||
qemu_mutex_lock(&rt->lock);
|
||||
g_tree_insert(rt->tree, &tb->tc, tb);
|
||||
qemu_mutex_unlock(&rt->lock);
|
||||
}
|
||||
|
||||
void tcg_tb_remove(TranslationBlock *tb)
|
||||
{
|
||||
struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
|
||||
|
||||
g_assert(rt != NULL);
|
||||
qemu_mutex_lock(&rt->lock);
|
||||
g_tree_remove(rt->tree, &tb->tc);
|
||||
qemu_mutex_unlock(&rt->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the TB 'tb' such that
|
||||
* tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
|
||||
* Return NULL if not found.
|
||||
*/
|
||||
TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
|
||||
{
|
||||
struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
|
||||
TranslationBlock *tb;
|
||||
struct tb_tc s = { .ptr = (void *)tc_ptr };
|
||||
|
||||
if (rt == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
qemu_mutex_lock(&rt->lock);
|
||||
tb = g_tree_lookup(rt->tree, &s);
|
||||
qemu_mutex_unlock(&rt->lock);
|
||||
return tb;
|
||||
}
|
||||
|
||||
static void tcg_region_tree_lock_all(void)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < region.n; i++) {
|
||||
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
||||
|
||||
qemu_mutex_lock(&rt->lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void tcg_region_tree_unlock_all(void)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < region.n; i++) {
|
||||
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
||||
|
||||
qemu_mutex_unlock(&rt->lock);
|
||||
}
|
||||
}
|
||||
|
||||
void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
tcg_region_tree_lock_all();
|
||||
for (i = 0; i < region.n; i++) {
|
||||
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
||||
|
||||
g_tree_foreach(rt->tree, func, user_data);
|
||||
}
|
||||
tcg_region_tree_unlock_all();
|
||||
}
|
||||
|
||||
size_t tcg_nb_tbs(void)
|
||||
{
|
||||
size_t nb_tbs = 0;
|
||||
size_t i;
|
||||
|
||||
tcg_region_tree_lock_all();
|
||||
for (i = 0; i < region.n; i++) {
|
||||
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
||||
|
||||
nb_tbs += g_tree_nnodes(rt->tree);
|
||||
}
|
||||
tcg_region_tree_unlock_all();
|
||||
return nb_tbs;
|
||||
}
|
||||
|
||||
static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
|
||||
{
|
||||
TranslationBlock *tb = v;
|
||||
|
||||
tb_destroy(tb);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static void tcg_region_tree_reset_all(void)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
tcg_region_tree_lock_all();
|
||||
for (i = 0; i < region.n; i++) {
|
||||
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
||||
|
||||
g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
|
||||
/* Increment the refcount first so that destroy acts as a reset */
|
||||
g_tree_ref(rt->tree);
|
||||
g_tree_destroy(rt->tree);
|
||||
}
|
||||
tcg_region_tree_unlock_all();
|
||||
}
|
||||
|
||||
static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
|
||||
{
|
||||
void *start, *end;
|
||||
|
||||
start = region.start_aligned + curr_region * region.stride;
|
||||
end = start + region.size;
|
||||
|
||||
if (curr_region == 0) {
|
||||
start = region.start;
|
||||
}
|
||||
if (curr_region == region.n - 1) {
|
||||
end = region.end;
|
||||
}
|
||||
|
||||
*pstart = start;
|
||||
*pend = end;
|
||||
}
|
||||
|
||||
static void tcg_region_assign(TCGContext *s, size_t curr_region)
|
||||
{
|
||||
void *start, *end;
|
||||
|
||||
tcg_region_bounds(curr_region, &start, &end);
|
||||
|
||||
s->code_gen_buffer = start;
|
||||
s->code_gen_ptr = start;
|
||||
s->code_gen_buffer_size = end - start;
|
||||
s->code_gen_highwater = end - TCG_HIGHWATER;
|
||||
}
|
||||
|
||||
static bool tcg_region_alloc__locked(TCGContext *s)
|
||||
{
|
||||
if (region.current == region.n) {
|
||||
return true;
|
||||
}
|
||||
tcg_region_assign(s, region.current);
|
||||
region.current++;
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Request a new region once the one in use has filled up.
|
||||
* Returns true on error.
|
||||
*/
|
||||
static bool tcg_region_alloc(TCGContext *s)
|
||||
{
|
||||
bool err;
|
||||
/* read the region size now; alloc__locked will overwrite it on success */
|
||||
size_t size_full = s->code_gen_buffer_size;
|
||||
|
||||
qemu_mutex_lock(®ion.lock);
|
||||
err = tcg_region_alloc__locked(s);
|
||||
if (!err) {
|
||||
region.agg_size_full += size_full - TCG_HIGHWATER;
|
||||
}
|
||||
qemu_mutex_unlock(®ion.lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform a context's first region allocation.
|
||||
* This function does _not_ increment region.agg_size_full.
|
||||
*/
|
||||
static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
|
||||
{
|
||||
return tcg_region_alloc__locked(s);
|
||||
}
|
||||
|
||||
/* Call from a safe-work context */
|
||||
void tcg_region_reset_all(void)
|
||||
{
|
||||
unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
|
||||
unsigned int i;
|
||||
|
||||
qemu_mutex_lock(®ion.lock);
|
||||
region.current = 0;
|
||||
region.agg_size_full = 0;
|
||||
|
||||
for (i = 0; i < n_ctxs; i++) {
|
||||
TCGContext *s = qatomic_read(&tcg_ctxs[i]);
|
||||
bool err = tcg_region_initial_alloc__locked(s);
|
||||
|
||||
g_assert(!err);
|
||||
}
|
||||
qemu_mutex_unlock(®ion.lock);
|
||||
|
||||
tcg_region_tree_reset_all();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_USER_ONLY
|
||||
static size_t tcg_n_regions(void)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
#else
|
||||
/*
|
||||
* It is likely that some vCPUs will translate more code than others, so we
|
||||
* first try to set more regions than max_cpus, with those regions being of
|
||||
* reasonable size. If that's not possible we make do by evenly dividing
|
||||
* the code_gen_buffer among the vCPUs.
|
||||
*/
|
||||
static size_t tcg_n_regions(void)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
/* Use a single region if all we have is one vCPU thread */
|
||||
#if !defined(CONFIG_USER_ONLY)
|
||||
MachineState *ms = MACHINE(qdev_get_machine());
|
||||
unsigned int max_cpus = ms->smp.max_cpus;
|
||||
#endif
|
||||
if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Try to have more regions than max_cpus, with each region being >= 2 MB */
|
||||
for (i = 8; i > 0; i--) {
|
||||
size_t regions_per_thread = i;
|
||||
size_t region_size;
|
||||
|
||||
region_size = tcg_init_ctx.code_gen_buffer_size;
|
||||
region_size /= max_cpus * regions_per_thread;
|
||||
|
||||
if (region_size >= 2 * 1024u * 1024) {
|
||||
return max_cpus * regions_per_thread;
|
||||
}
|
||||
}
|
||||
/* If we can't, then just allocate one region per vCPU thread */
|
||||
return max_cpus;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Initializes region partitioning.
|
||||
*
|
||||
* Called at init time from the parent thread (i.e. the one calling
|
||||
* tcg_context_init), after the target's TCG globals have been set.
|
||||
*
|
||||
* Region partitioning works by splitting code_gen_buffer into separate regions,
|
||||
* and then assigning regions to TCG threads so that the threads can translate
|
||||
* code in parallel without synchronization.
|
||||
*
|
||||
* In softmmu the number of TCG threads is bounded by max_cpus, so we use at
|
||||
* least max_cpus regions in MTTCG. In !MTTCG we use a single region.
|
||||
* Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
|
||||
* must have been parsed before calling this function, since it calls
|
||||
* qemu_tcg_mttcg_enabled().
|
||||
*
|
||||
* In user-mode we use a single region. Having multiple regions in user-mode
|
||||
* is not supported, because the number of vCPU threads (recall that each thread
|
||||
* spawned by the guest corresponds to a vCPU thread) is only bounded by the
|
||||
* OS, and usually this number is huge (tens of thousands is not uncommon).
|
||||
* Thus, given this large bound on the number of vCPU threads and the fact
|
||||
* that code_gen_buffer is allocated at compile-time, we cannot guarantee
|
||||
* that the availability of at least one region per vCPU thread.
|
||||
*
|
||||
* However, this user-mode limitation is unlikely to be a significant problem
|
||||
* in practice. Multi-threaded guests share most if not all of their translated
|
||||
* code, which makes parallel code generation less appealing than in softmmu.
|
||||
*/
|
||||
void tcg_region_init(void)
|
||||
{
|
||||
void *buf = tcg_init_ctx.code_gen_buffer;
|
||||
void *aligned;
|
||||
size_t size = tcg_init_ctx.code_gen_buffer_size;
|
||||
size_t page_size = qemu_real_host_page_size;
|
||||
size_t region_size;
|
||||
size_t n_regions;
|
||||
size_t i;
|
||||
|
||||
n_regions = tcg_n_regions();
|
||||
|
||||
/* The first region will be 'aligned - buf' bytes larger than the others */
|
||||
aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
|
||||
g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
|
||||
/*
|
||||
* Make region_size a multiple of page_size, using aligned as the start.
|
||||
* As a result of this we might end up with a few extra pages at the end of
|
||||
* the buffer; we will assign those to the last region.
|
||||
*/
|
||||
region_size = (size - (aligned - buf)) / n_regions;
|
||||
region_size = QEMU_ALIGN_DOWN(region_size, page_size);
|
||||
|
||||
/* A region must have at least 2 pages; one code, one guard */
|
||||
g_assert(region_size >= 2 * page_size);
|
||||
|
||||
/* init the region struct */
|
||||
qemu_mutex_init(®ion.lock);
|
||||
region.n = n_regions;
|
||||
region.size = region_size - page_size;
|
||||
region.stride = region_size;
|
||||
region.start = buf;
|
||||
region.start_aligned = aligned;
|
||||
/* page-align the end, since its last page will be a guard page */
|
||||
region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
|
||||
/* account for that last guard page */
|
||||
region.end -= page_size;
|
||||
|
||||
/*
|
||||
* Set guard pages in the rw buffer, as that's the one into which
|
||||
* buffer overruns could occur. Do not set guard pages in the rx
|
||||
* buffer -- let that one use hugepages throughout.
|
||||
*/
|
||||
for (i = 0; i < region.n; i++) {
|
||||
void *start, *end;
|
||||
|
||||
tcg_region_bounds(i, &start, &end);
|
||||
|
||||
/*
|
||||
* macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect
|
||||
* rejects a permission change from RWX -> NONE. Guard pages are
|
||||
* nice for bug detection but are not essential; ignore any failure.
|
||||
*/
|
||||
(void)qemu_mprotect_none(end, page_size);
|
||||
}
|
||||
|
||||
tcg_region_trees_init();
|
||||
|
||||
/* In user-mode we support only one ctx, so do the initial allocation now */
|
||||
#ifdef CONFIG_USER_ONLY
|
||||
{
|
||||
bool err = tcg_region_initial_alloc__locked(tcg_ctx);
|
||||
|
||||
g_assert(!err);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_TCG
|
||||
const void *tcg_splitwx_to_rx(void *rw)
|
||||
{
|
||||
/* Pass NULL pointers unchanged. */
|
||||
if (rw) {
|
||||
g_assert(in_code_gen_buffer(rw));
|
||||
rw += tcg_splitwx_diff;
|
||||
}
|
||||
return rw;
|
||||
}
|
||||
|
||||
void *tcg_splitwx_to_rw(const void *rx)
|
||||
{
|
||||
/* Pass NULL pointers unchanged. */
|
||||
if (rx) {
|
||||
rx -= tcg_splitwx_diff;
|
||||
/* Assert that we end with a pointer in the rw region. */
|
||||
g_assert(in_code_gen_buffer(rx));
|
||||
}
|
||||
return (void *)rx;
|
||||
}
|
||||
#endif /* CONFIG_DEBUG_TCG */
|
||||
|
||||
static void alloc_tcg_plugin_context(TCGContext *s)
|
||||
{
|
||||
#ifdef CONFIG_PLUGIN
|
||||
|
@ -939,10 +451,8 @@ void tcg_register_thread(void)
|
|||
#else
|
||||
void tcg_register_thread(void)
|
||||
{
|
||||
MachineState *ms = MACHINE(qdev_get_machine());
|
||||
TCGContext *s = g_malloc(sizeof(*s));
|
||||
unsigned int i, n;
|
||||
bool err;
|
||||
|
||||
*s = tcg_init_ctx;
|
||||
|
||||
|
@ -956,79 +466,19 @@ void tcg_register_thread(void)
|
|||
}
|
||||
|
||||
/* Claim an entry in tcg_ctxs */
|
||||
n = qatomic_fetch_inc(&n_tcg_ctxs);
|
||||
g_assert(n < ms->smp.max_cpus);
|
||||
n = qatomic_fetch_inc(&tcg_cur_ctxs);
|
||||
g_assert(n < tcg_max_ctxs);
|
||||
qatomic_set(&tcg_ctxs[n], s);
|
||||
|
||||
if (n > 0) {
|
||||
alloc_tcg_plugin_context(s);
|
||||
tcg_region_initial_alloc(s);
|
||||
}
|
||||
|
||||
tcg_ctx = s;
|
||||
qemu_mutex_lock(®ion.lock);
|
||||
err = tcg_region_initial_alloc__locked(tcg_ctx);
|
||||
g_assert(!err);
|
||||
qemu_mutex_unlock(®ion.lock);
|
||||
}
|
||||
#endif /* !CONFIG_USER_ONLY */
|
||||
|
||||
/*
|
||||
* Returns the size (in bytes) of all translated code (i.e. from all regions)
|
||||
* currently in the cache.
|
||||
* See also: tcg_code_capacity()
|
||||
* Do not confuse with tcg_current_code_size(); that one applies to a single
|
||||
* TCG context.
|
||||
*/
|
||||
size_t tcg_code_size(void)
|
||||
{
|
||||
unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
|
||||
unsigned int i;
|
||||
size_t total;
|
||||
|
||||
qemu_mutex_lock(®ion.lock);
|
||||
total = region.agg_size_full;
|
||||
for (i = 0; i < n_ctxs; i++) {
|
||||
const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
|
||||
size_t size;
|
||||
|
||||
size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
|
||||
g_assert(size <= s->code_gen_buffer_size);
|
||||
total += size;
|
||||
}
|
||||
qemu_mutex_unlock(®ion.lock);
|
||||
return total;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the code capacity (in bytes) of the entire cache, i.e. including all
|
||||
* regions.
|
||||
* See also: tcg_code_size()
|
||||
*/
|
||||
size_t tcg_code_capacity(void)
|
||||
{
|
||||
size_t guard_size, capacity;
|
||||
|
||||
/* no need for synchronization; these variables are set at init time */
|
||||
guard_size = region.stride - region.size;
|
||||
capacity = region.end + guard_size - region.start;
|
||||
capacity -= region.n * (guard_size + TCG_HIGHWATER);
|
||||
return capacity;
|
||||
}
|
||||
|
||||
size_t tcg_tb_phys_invalidate_count(void)
|
||||
{
|
||||
unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
|
||||
unsigned int i;
|
||||
size_t total = 0;
|
||||
|
||||
for (i = 0; i < n_ctxs; i++) {
|
||||
const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
|
||||
|
||||
total += qatomic_read(&s->tb_phys_invalidate_count);
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
/* pool based memory allocation */
|
||||
void *tcg_malloc_internal(TCGContext *s, int size)
|
||||
{
|
||||
|
@ -1101,8 +551,9 @@ static void process_op_defs(TCGContext *s);
|
|||
static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
|
||||
TCGReg reg, const char *name);
|
||||
|
||||
void tcg_context_init(TCGContext *s)
|
||||
static void tcg_context_init(unsigned max_cpus)
|
||||
{
|
||||
TCGContext *s = &tcg_init_ctx;
|
||||
int op, total_args, n, i;
|
||||
TCGOpDef *def;
|
||||
TCGArgConstraint *args_ct;
|
||||
|
@ -1167,11 +618,11 @@ void tcg_context_init(TCGContext *s)
|
|||
*/
|
||||
#ifdef CONFIG_USER_ONLY
|
||||
tcg_ctxs = &tcg_ctx;
|
||||
n_tcg_ctxs = 1;
|
||||
tcg_cur_ctxs = 1;
|
||||
tcg_max_ctxs = 1;
|
||||
#else
|
||||
MachineState *ms = MACHINE(qdev_get_machine());
|
||||
unsigned int max_cpus = ms->smp.max_cpus;
|
||||
tcg_ctxs = g_new(TCGContext *, max_cpus);
|
||||
tcg_max_ctxs = max_cpus;
|
||||
tcg_ctxs = g_new0(TCGContext *, max_cpus);
|
||||
#endif
|
||||
|
||||
tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
|
||||
|
@ -1179,6 +630,12 @@ void tcg_context_init(TCGContext *s)
|
|||
cpu_env = temp_tcgv_ptr(ts);
|
||||
}
|
||||
|
||||
void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
|
||||
{
|
||||
tcg_context_init(max_cpus);
|
||||
tcg_region_init(tb_size, splitwx, max_cpus);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate TBs right before their corresponding translated code, making
|
||||
* sure that TBs and code are on different cache lines.
|
||||
|
@ -1206,32 +663,16 @@ TranslationBlock *tcg_tb_alloc(TCGContext *s)
|
|||
|
||||
void tcg_prologue_init(TCGContext *s)
|
||||
{
|
||||
size_t prologue_size, total_size;
|
||||
void *buf0, *buf1;
|
||||
size_t prologue_size;
|
||||
|
||||
/* Put the prologue at the beginning of code_gen_buffer. */
|
||||
buf0 = s->code_gen_buffer;
|
||||
total_size = s->code_gen_buffer_size;
|
||||
s->code_ptr = buf0;
|
||||
s->code_buf = buf0;
|
||||
s->code_ptr = s->code_gen_ptr;
|
||||
s->code_buf = s->code_gen_ptr;
|
||||
s->data_gen_ptr = NULL;
|
||||
|
||||
/*
|
||||
* The region trees are not yet configured, but tcg_splitwx_to_rx
|
||||
* needs the bounds for an assert.
|
||||
*/
|
||||
region.start = buf0;
|
||||
region.end = buf0 + total_size;
|
||||
|
||||
#ifndef CONFIG_TCG_INTERPRETER
|
||||
tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(buf0);
|
||||
tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
|
||||
#endif
|
||||
|
||||
/* Compute a high-water mark, at which we voluntarily flush the buffer
|
||||
and start over. The size here is arbitrary, significantly larger
|
||||
than we expect the code generation for any one opcode to require. */
|
||||
s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
|
||||
|
||||
#ifdef TCG_TARGET_NEED_POOL_LABELS
|
||||
s->pool_labels = NULL;
|
||||
#endif
|
||||
|
@ -1248,32 +689,25 @@ void tcg_prologue_init(TCGContext *s)
|
|||
}
|
||||
#endif
|
||||
|
||||
buf1 = s->code_ptr;
|
||||
prologue_size = tcg_current_code_size(s);
|
||||
|
||||
#ifndef CONFIG_TCG_INTERPRETER
|
||||
flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0), (uintptr_t)buf0,
|
||||
tcg_ptr_byte_diff(buf1, buf0));
|
||||
flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
|
||||
(uintptr_t)s->code_buf, prologue_size);
|
||||
#endif
|
||||
|
||||
/* Deduct the prologue from the buffer. */
|
||||
prologue_size = tcg_current_code_size(s);
|
||||
s->code_gen_ptr = buf1;
|
||||
s->code_gen_buffer = buf1;
|
||||
s->code_buf = buf1;
|
||||
total_size -= prologue_size;
|
||||
s->code_gen_buffer_size = total_size;
|
||||
|
||||
tcg_register_jit(tcg_splitwx_to_rx(s->code_gen_buffer), total_size);
|
||||
tcg_region_prologue_set(s);
|
||||
|
||||
#ifdef DEBUG_DISAS
|
||||
if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
|
||||
FILE *logfile = qemu_log_lock();
|
||||
qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
|
||||
if (s->data_gen_ptr) {
|
||||
size_t code_size = s->data_gen_ptr - buf0;
|
||||
size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
|
||||
size_t data_size = prologue_size - code_size;
|
||||
size_t i;
|
||||
|
||||
log_disas(buf0, code_size);
|
||||
log_disas(s->code_gen_ptr, code_size);
|
||||
|
||||
for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
|
||||
if (sizeof(tcg_target_ulong) == 8) {
|
||||
|
@ -1287,7 +721,7 @@ void tcg_prologue_init(TCGContext *s)
|
|||
}
|
||||
}
|
||||
} else {
|
||||
log_disas(buf0, prologue_size);
|
||||
log_disas(s->code_gen_ptr, prologue_size);
|
||||
}
|
||||
qemu_log("\n");
|
||||
qemu_log_flush();
|
||||
|
@ -2649,6 +2083,19 @@ void tcg_op_remove(TCGContext *s, TCGOp *op)
|
|||
#endif
|
||||
}
|
||||
|
||||
void tcg_remove_ops_after(TCGOp *op)
|
||||
{
|
||||
TCGContext *s = tcg_ctx;
|
||||
|
||||
while (true) {
|
||||
TCGOp *last = tcg_last_op();
|
||||
if (last == op) {
|
||||
return;
|
||||
}
|
||||
tcg_op_remove(s, last);
|
||||
}
|
||||
}
|
||||
|
||||
static TCGOp *tcg_op_alloc(TCGOpcode opc)
|
||||
{
|
||||
TCGContext *s = tcg_ctx;
|
||||
|
@ -4480,7 +3927,7 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
|
|||
static inline
|
||||
void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
|
||||
{
|
||||
unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
|
||||
unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < n_ctxs; i++) {
|
||||
|
@ -4543,7 +3990,7 @@ void tcg_dump_op_count(void)
|
|||
|
||||
int64_t tcg_cpu_exec_time(void)
|
||||
{
|
||||
unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
|
||||
unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
|
||||
unsigned int i;
|
||||
int64_t ret = 0;
|
||||
|
||||
|
|
|
@ -43,6 +43,7 @@
|
|||
#define TCG_TARGET_INTERPRETER 1
|
||||
#define TCG_TARGET_INSN_UNIT_SIZE 1
|
||||
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
|
||||
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
|
||||
|
||||
#if UINTPTR_MAX == UINT32_MAX
|
||||
# define TCG_TARGET_REG_BITS 32
|
||||
|
|
|
@ -97,6 +97,15 @@ static int qemu_mprotect__osdep(void *addr, size_t size, int prot)
|
|||
#endif
|
||||
}
|
||||
|
||||
int qemu_mprotect_rw(void *addr, size_t size)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
return qemu_mprotect__osdep(addr, size, PAGE_READWRITE);
|
||||
#else
|
||||
return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE);
|
||||
#endif
|
||||
}
|
||||
|
||||
int qemu_mprotect_rwx(void *addr, size_t size)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
|
|
Loading…
Reference in a new issue