Improvements to 128-bit atomics:

- Separate __int128_t type and arithmetic detection
   - Support 128-bit load/store in backend for i386, aarch64, ppc64, s390x
   - Accelerate atomics via host/include/
 Decodetree:
   - Add named field syntax
   - Move tests to meson
 -----BEGIN PGP SIGNATURE-----
 
 iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmR2R10dHHJpY2hhcmQu
 aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV/bsgf/XLi8q+ITyoEAKwG4
 6ML7DktLAdIs9Euah9twqe16U0BM0YzpKfymBfVVBKKaIa0524N4ZKIT3h6EeJo+
 f+ultqrpsnH+aQh4wc3ZCkEvRdhzhFT8VcoRTunJuJrbL3Y8n2ZSgODUL2a0tahT
 Nn+zEPm8rzQanSKQHq5kyNBLpgTUKjc5wKfvy/WwttnFmkTnqzcuEA6nPVOVwOHC
 lZBQCByIQWsHfFHUVJFvsFzBQbm0mAiW6FNKzPBkoXon0h/UZUI1lV+xXzgutFs+
 zR2O8IZwLYRu2wOWiTF8Nn2qQafkB3Dhwoq3JTEXhOqosOPExbIiWlsZDlPiKRJk
 bwmQlg==
 =XQMb
 -----END PGP SIGNATURE-----

Merge tag 'pull-tcg-20230530' of https://gitlab.com/rth7680/qemu into staging

Improvements to 128-bit atomics:
  - Separate __int128_t type and arithmetic detection
  - Support 128-bit load/store in backend for i386, aarch64, ppc64, s390x
  - Accelerate atomics via host/include/
Decodetree:
  - Add named field syntax
  - Move tests to meson

# -----BEGIN PGP SIGNATURE-----
#
# iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmR2R10dHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV/bsgf/XLi8q+ITyoEAKwG4
# 6ML7DktLAdIs9Euah9twqe16U0BM0YzpKfymBfVVBKKaIa0524N4ZKIT3h6EeJo+
# f+ultqrpsnH+aQh4wc3ZCkEvRdhzhFT8VcoRTunJuJrbL3Y8n2ZSgODUL2a0tahT
# Nn+zEPm8rzQanSKQHq5kyNBLpgTUKjc5wKfvy/WwttnFmkTnqzcuEA6nPVOVwOHC
# lZBQCByIQWsHfFHUVJFvsFzBQbm0mAiW6FNKzPBkoXon0h/UZUI1lV+xXzgutFs+
# zR2O8IZwLYRu2wOWiTF8Nn2qQafkB3Dhwoq3JTEXhOqosOPExbIiWlsZDlPiKRJk
# bwmQlg==
# =XQMb
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 30 May 2023 11:58:37 AM PDT
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [ultimate]

* tag 'pull-tcg-20230530' of https://gitlab.com/rth7680/qemu: (27 commits)
  tests/decode: Add tests for various named-field cases
  scripts/decodetree: Implement named field support
  scripts/decodetree: Implement a topological sort
  scripts/decodetree: Pass lvalue-formatter function to str_extract()
  docs: Document decodetree named field syntax
  tests/decode: Convert tests to meson
  decodetree: Do not remove output_file from /dev
  decodetree: Diagnose empty pattern group
  decodetree: Fix recursion in prop_format and build_tree
  decodetree: Add --test-for-error
  tcg: Remove TCG_TARGET_TLB_DISPLACEMENT_BITS
  accel/tcg: Add aarch64 store_atom_insert_al16
  accel/tcg: Add aarch64 lse2 load_atom_extract_al16_or_al8
  accel/tcg: Add x86_64 load_atom_extract_al16_or_al8
  accel/tcg: Extract store_atom_insert_al16 to host header
  accel/tcg: Extract load_atom_extract_al16_or_al8 to host header
  tcg/s390x: Support 128-bit load/store
  tcg/ppc: Support 128-bit load/store
  tcg/aarch64: Support 128-bit load/store
  tcg/aarch64: Simplify constraints on qemu_ld/st
  ...

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2023-05-30 13:25:18 -07:00
commit 51bdb0b57a
38 changed files with 1312 additions and 225 deletions

View file

@ -9,6 +9,9 @@
* See the COPYING file in the top-level directory.
*/
#include "host/load-extract-al16-al8.h"
#include "host/store-insert-al16.h"
#ifdef CONFIG_ATOMIC64
# define HAVE_al8 true
#else
@ -156,7 +159,7 @@ static uint64_t load_atomic8_or_exit(CPUArchState *env, uintptr_t ra, void *pv)
* another process, because the fallback start_exclusive solution
* provides no protection across processes.
*/
if (!page_check_range(h2g(pv), 8, PAGE_WRITE)) {
if (!page_check_range(h2g(pv), 8, PAGE_WRITE_ORG)) {
uint64_t *p = __builtin_assume_aligned(pv, 8);
return *p;
}
@ -191,7 +194,7 @@ static Int128 load_atomic16_or_exit(CPUArchState *env, uintptr_t ra, void *pv)
* another process, because the fallback start_exclusive solution
* provides no protection across processes.
*/
if (!page_check_range(h2g(p), 16, PAGE_WRITE)) {
if (!page_check_range(h2g(p), 16, PAGE_WRITE_ORG)) {
return *p;
}
#endif
@ -311,40 +314,6 @@ static uint64_t load_atom_extract_al16_or_exit(CPUArchState *env, uintptr_t ra,
return int128_getlo(r);
}
/**
* load_atom_extract_al16_or_al8:
* @p: host address
* @s: object size in bytes, @s <= 8.
*
* Load @s bytes from @p, when p % s != 0. If [p, p+s-1] does not
* cross an 16-byte boundary then the access must be 16-byte atomic,
* otherwise the access must be 8-byte atomic.
*/
static inline uint64_t ATTRIBUTE_ATOMIC128_OPT
load_atom_extract_al16_or_al8(void *pv, int s)
{
uintptr_t pi = (uintptr_t)pv;
int o = pi & 7;
int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8;
Int128 r;
pv = (void *)(pi & ~7);
if (pi & 8) {
uint64_t *p8 = __builtin_assume_aligned(pv, 16, 8);
uint64_t a = qatomic_read__nocheck(p8);
uint64_t b = qatomic_read__nocheck(p8 + 1);
if (HOST_BIG_ENDIAN) {
r = int128_make128(b, a);
} else {
r = int128_make128(a, b);
}
} else {
r = atomic16_read_ro(pv);
}
return int128_getlo(int128_urshift(r, shr));
}
/**
* load_atom_4_by_2:
* @pv: host address
@ -713,45 +682,6 @@ static void store_atom_insert_al8(uint64_t *p, uint64_t val, uint64_t msk)
__ATOMIC_RELAXED, __ATOMIC_RELAXED));
}
/**
* store_atom_insert_al16:
* @p: host address
* @val: shifted value to store
* @msk: mask for value to store
*
* Atomically store @val to @p masked by @msk.
*/
static void ATTRIBUTE_ATOMIC128_OPT
store_atom_insert_al16(Int128 *ps, Int128Alias val, Int128Alias msk)
{
#if defined(CONFIG_ATOMIC128)
__uint128_t *pu, old, new;
/* With CONFIG_ATOMIC128, we can avoid the memory barriers. */
pu = __builtin_assume_aligned(ps, 16);
old = *pu;
do {
new = (old & ~msk.u) | val.u;
} while (!__atomic_compare_exchange_n(pu, &old, new, true,
__ATOMIC_RELAXED, __ATOMIC_RELAXED));
#elif defined(CONFIG_CMPXCHG128)
__uint128_t *pu, old, new;
/*
* Without CONFIG_ATOMIC128, __atomic_compare_exchange_n will always
* defer to libatomic, so we must use __sync_*_compare_and_swap_16
* and accept the sequential consistency that comes with it.
*/
pu = __builtin_assume_aligned(ps, 16);
do {
old = *pu;
new = (old & ~msk.u) | val.u;
} while (!__sync_bool_compare_and_swap_16(pu, old, new));
#else
qemu_build_not_reached();
#endif
}
/**
* store_bytes_leN:
* @pv: host address

View file

@ -23,22 +23,42 @@ Fields
Syntax::
field_def := '%' identifier ( unnamed_field )* ( !function=identifier )?
field_def := '%' identifier ( field )* ( !function=identifier )?
field := unnamed_field | named_field
unnamed_field := number ':' ( 's' ) number
named_field := identifier ':' ( 's' ) number
For *unnamed_field*, the first number is the least-significant bit position
of the field and the second number is the length of the field. If the 's' is
present, the field is considered signed. If multiple ``unnamed_fields`` are
present, they are concatenated. In this way one can define disjoint fields.
present, the field is considered signed.
A *named_field* refers to some other field in the instruction pattern
or format. Regardless of the length of the other field where it is
defined, it will be inserted into this field with the specified
signedness and bit width.
Field definitions that involve loops (i.e. where a field is defined
directly or indirectly in terms of itself) are errors.
A format can include fields that refer to named fields that are
defined in the instruction pattern(s) that use the format.
Conversely, an instruction pattern can include fields that refer to
named fields that are defined in the format it uses. However you
cannot currently do both at once (i.e. pattern P uses format F; F has
a field A that refers to a named field B that is defined in P, and P
has a field C that refers to a named field D that is defined in F).
If multiple ``fields`` are present, they are concatenated.
In this way one can define disjoint fields.
If ``!function`` is specified, the concatenated result is passed through the
named function, taking and returning an integral value.
One may use ``!function`` with zero ``unnamed_fields``. This case is called
One may use ``!function`` with zero ``fields``. This case is called
a *parameter*, and the named function is only passed the ``DisasContext``
and returns an integral value extracted from there.
A field with no ``unnamed_fields`` and no ``!function`` is in error.
A field with no ``fields`` and no ``!function`` is in error.
Field examples:
@ -56,6 +76,9 @@ Field examples:
| %shimm8 5:s8 13:1 | expand_shimm8(sextract(i, 5, 8) << 1 | |
| !function=expand_shimm8 | extract(i, 13, 1)) |
+---------------------------+---------------------------------------------+
| %sz_imm 10:2 sz:3 | expand_sz_imm(extract(i, 10, 2) << 3 | |
| !function=expand_sz_imm | extract(a->sz, 0, 3)) |
+---------------------------+---------------------------------------------+
Argument Sets
=============

View file

@ -0,0 +1,40 @@
/*
* SPDX-License-Identifier: GPL-2.0-or-later
* Atomic extract 64 from 128-bit, AArch64 version.
*
* Copyright (C) 2023 Linaro, Ltd.
*/
#ifndef AARCH64_LOAD_EXTRACT_AL16_AL8_H
#define AARCH64_LOAD_EXTRACT_AL16_AL8_H
#include "host/cpuinfo.h"
#include "tcg/debug-assert.h"
/**
* load_atom_extract_al16_or_al8:
* @pv: host address
* @s: object size in bytes, @s <= 8.
*
* Load @s bytes from @pv, when pv % s != 0. If [p, p+s-1] does not
* cross an 16-byte boundary then the access must be 16-byte atomic,
* otherwise the access must be 8-byte atomic.
*/
static inline uint64_t load_atom_extract_al16_or_al8(void *pv, int s)
{
uintptr_t pi = (uintptr_t)pv;
__int128_t *ptr_align = (__int128_t *)(pi & ~7);
int shr = (pi & 7) * 8;
uint64_t l, h;
/*
* With FEAT_LSE2, LDP is single-copy atomic if 16-byte aligned
* and single-copy atomic on the parts if 8-byte aligned.
* All we need do is align the pointer mod 8.
*/
tcg_debug_assert(HAVE_ATOMIC128_RO);
asm("ldp %0, %1, %2" : "=r"(l), "=r"(h) : "m"(*ptr_align));
return (l >> shr) | (h << (-shr & 63));
}
#endif /* AARCH64_LOAD_EXTRACT_AL16_AL8_H */

View file

@ -0,0 +1,47 @@
/*
* SPDX-License-Identifier: GPL-2.0-or-later
* Atomic store insert into 128-bit, AArch64 version.
*
* Copyright (C) 2023 Linaro, Ltd.
*/
#ifndef AARCH64_STORE_INSERT_AL16_H
#define AARCH64_STORE_INSERT_AL16_H
/**
* store_atom_insert_al16:
* @p: host address
* @val: shifted value to store
* @msk: mask for value to store
*
* Atomically store @val to @p masked by @msk.
*/
static inline void ATTRIBUTE_ATOMIC128_OPT
store_atom_insert_al16(Int128 *ps, Int128 val, Int128 msk)
{
/*
* GCC only implements __sync* primitives for int128 on aarch64.
* We can do better without the barriers, and integrating the
* arithmetic into the load-exclusive/store-conditional pair.
*/
uint64_t tl, th, vl, vh, ml, mh;
uint32_t fail;
qemu_build_assert(!HOST_BIG_ENDIAN);
vl = int128_getlo(val);
vh = int128_gethi(val);
ml = int128_getlo(msk);
mh = int128_gethi(msk);
asm("0: ldxp %[l], %[h], %[mem]\n\t"
"bic %[l], %[l], %[ml]\n\t"
"bic %[h], %[h], %[mh]\n\t"
"orr %[l], %[l], %[vl]\n\t"
"orr %[h], %[h], %[vh]\n\t"
"stxp %w[f], %[l], %[h], %[mem]\n\t"
"cbnz %w[f], 0b\n"
: [mem] "+Q"(*ps), [f] "=&r"(fail), [l] "=&r"(tl), [h] "=&r"(th)
: [vl] "r"(vl), [vh] "r"(vh), [ml] "r"(ml), [mh] "r"(mh));
}
#endif /* AARCH64_STORE_INSERT_AL16_H */

View file

@ -0,0 +1,45 @@
/*
* SPDX-License-Identifier: GPL-2.0-or-later
* Atomic extract 64 from 128-bit, generic version.
*
* Copyright (C) 2023 Linaro, Ltd.
*/
#ifndef HOST_LOAD_EXTRACT_AL16_AL8_H
#define HOST_LOAD_EXTRACT_AL16_AL8_H
/**
* load_atom_extract_al16_or_al8:
* @pv: host address
* @s: object size in bytes, @s <= 8.
*
* Load @s bytes from @pv, when pv % s != 0. If [p, p+s-1] does not
* cross an 16-byte boundary then the access must be 16-byte atomic,
* otherwise the access must be 8-byte atomic.
*/
static inline uint64_t ATTRIBUTE_ATOMIC128_OPT
load_atom_extract_al16_or_al8(void *pv, int s)
{
uintptr_t pi = (uintptr_t)pv;
int o = pi & 7;
int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8;
Int128 r;
pv = (void *)(pi & ~7);
if (pi & 8) {
uint64_t *p8 = __builtin_assume_aligned(pv, 16, 8);
uint64_t a = qatomic_read__nocheck(p8);
uint64_t b = qatomic_read__nocheck(p8 + 1);
if (HOST_BIG_ENDIAN) {
r = int128_make128(b, a);
} else {
r = int128_make128(a, b);
}
} else {
r = atomic16_read_ro(pv);
}
return int128_getlo(int128_urshift(r, shr));
}
#endif /* HOST_LOAD_EXTRACT_AL16_AL8_H */

View file

@ -0,0 +1,50 @@
/*
* SPDX-License-Identifier: GPL-2.0-or-later
* Atomic store insert into 128-bit, generic version.
*
* Copyright (C) 2023 Linaro, Ltd.
*/
#ifndef HOST_STORE_INSERT_AL16_H
#define HOST_STORE_INSERT_AL16_H
/**
* store_atom_insert_al16:
* @p: host address
* @val: shifted value to store
* @msk: mask for value to store
*
* Atomically store @val to @p masked by @msk.
*/
static inline void ATTRIBUTE_ATOMIC128_OPT
store_atom_insert_al16(Int128 *ps, Int128 val, Int128 msk)
{
#if defined(CONFIG_ATOMIC128)
__uint128_t *pu;
Int128Alias old, new;
/* With CONFIG_ATOMIC128, we can avoid the memory barriers. */
pu = __builtin_assume_aligned(ps, 16);
old.u = *pu;
msk = int128_not(msk);
do {
new.s = int128_and(old.s, msk);
new.s = int128_or(new.s, val);
} while (!__atomic_compare_exchange_n(pu, &old.u, new.u, true,
__ATOMIC_RELAXED, __ATOMIC_RELAXED));
#else
Int128 old, new, cmp;
ps = __builtin_assume_aligned(ps, 16);
old = *ps;
msk = int128_not(msk);
do {
cmp = old;
new = int128_and(old, msk);
new = int128_or(new, val);
old = atomic16_cmpxchg(ps, cmp, new);
} while (int128_ne(cmp, old));
#endif
}
#endif /* HOST_STORE_INSERT_AL16_H */

View file

@ -0,0 +1,68 @@
/*
* SPDX-License-Identifier: GPL-2.0-or-later
* Load/store for 128-bit atomic operations, x86_64 version.
*
* Copyright (C) 2023 Linaro, Ltd.
*
* See docs/devel/atomics.rst for discussion about the guarantees each
* atomic primitive is meant to provide.
*/
#ifndef AARCH64_ATOMIC128_LDST_H
#define AARCH64_ATOMIC128_LDST_H
#ifdef CONFIG_INT128_TYPE
#include "host/cpuinfo.h"
#include "tcg/debug-assert.h"
/*
* Through clang 16, with -mcx16, __atomic_load_n is incorrectly
* expanded to a read-write operation: lock cmpxchg16b.
*/
#define HAVE_ATOMIC128_RO likely(cpuinfo & CPUINFO_ATOMIC_VMOVDQA)
#define HAVE_ATOMIC128_RW 1
static inline Int128 atomic16_read_ro(const Int128 *ptr)
{
Int128Alias r;
tcg_debug_assert(HAVE_ATOMIC128_RO);
asm("vmovdqa %1, %0" : "=x" (r.i) : "m" (*ptr));
return r.s;
}
static inline Int128 atomic16_read_rw(Int128 *ptr)
{
__int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
Int128Alias r;
if (HAVE_ATOMIC128_RO) {
asm("vmovdqa %1, %0" : "=x" (r.i) : "m" (*ptr_align));
} else {
r.i = __sync_val_compare_and_swap_16(ptr_align, 0, 0);
}
return r.s;
}
static inline void atomic16_set(Int128 *ptr, Int128 val)
{
__int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
Int128Alias new = { .s = val };
if (HAVE_ATOMIC128_RO) {
asm("vmovdqa %1, %0" : "=m"(*ptr_align) : "x" (new.i));
} else {
__int128_t old;
do {
old = *ptr_align;
} while (!__sync_bool_compare_and_swap_16(ptr_align, old, new.i));
}
}
#else
/* Provide QEMU_ERROR stubs. */
#include "host/include/generic/host/atomic128-ldst.h"
#endif
#endif /* AARCH64_ATOMIC128_LDST_H */

View file

@ -0,0 +1,50 @@
/*
* SPDX-License-Identifier: GPL-2.0-or-later
* Atomic extract 64 from 128-bit, x86_64 version.
*
* Copyright (C) 2023 Linaro, Ltd.
*/
#ifndef X86_64_LOAD_EXTRACT_AL16_AL8_H
#define X86_64_LOAD_EXTRACT_AL16_AL8_H
#ifdef CONFIG_INT128_TYPE
#include "host/cpuinfo.h"
/**
* load_atom_extract_al16_or_al8:
* @pv: host address
* @s: object size in bytes, @s <= 8.
*
* Load @s bytes from @pv, when pv % s != 0. If [p, p+s-1] does not
* cross an 16-byte boundary then the access must be 16-byte atomic,
* otherwise the access must be 8-byte atomic.
*/
static inline uint64_t ATTRIBUTE_ATOMIC128_OPT
load_atom_extract_al16_or_al8(void *pv, int s)
{
uintptr_t pi = (uintptr_t)pv;
__int128_t *ptr_align = (__int128_t *)(pi & ~7);
int shr = (pi & 7) * 8;
Int128Alias r;
/*
* ptr_align % 16 is now only 0 or 8.
* If the host supports atomic loads with VMOVDQU, then always use that,
* making the branch highly predictable. Otherwise we must use VMOVDQA
* when ptr_align % 16 == 0 for 16-byte atomicity.
*/
if ((cpuinfo & CPUINFO_ATOMIC_VMOVDQU) || (pi & 8)) {
asm("vmovdqu %1, %0" : "=x" (r.i) : "m" (*ptr_align));
} else {
asm("vmovdqa %1, %0" : "=x" (r.i) : "m" (*ptr_align));
}
return int128_getlo(int128_urshift(r.s, shr));
}
#else
/* Fallback definition that must be optimized away, or error. */
uint64_t QEMU_ERROR("unsupported atomic")
load_atom_extract_al16_or_al8(void *pv, int s);
#endif
#endif /* X86_64_LOAD_EXTRACT_AL16_AL8_H */

View file

@ -481,7 +481,7 @@ static inline void bswap128s(Int128 *s)
* a possible structure and the native types. Ease parameter passing
* via use of the transparent union extension.
*/
#ifdef CONFIG_INT128
#ifdef CONFIG_INT128_TYPE
typedef union {
__uint128_t u;
__int128_t i;
@ -489,6 +489,6 @@ typedef union {
} Int128Alias __attribute__((transparent_union));
#else
typedef Int128 Int128Alias;
#endif /* CONFIG_INT128 */
#endif /* CONFIG_INT128_TYPE */
#endif /* INT128_H */

View file

@ -2543,7 +2543,13 @@ config_host_data.set('CONFIG_ATOMIC64', cc.links('''
return 0;
}'''))
has_int128 = cc.links('''
has_int128_type = cc.compiles('''
__int128_t a;
__uint128_t b;
int main(void) { b = a; }''')
config_host_data.set('CONFIG_INT128_TYPE', has_int128_type)
has_int128 = has_int128_type and cc.links('''
__int128_t a;
__uint128_t b;
int main (void) {
@ -2552,10 +2558,9 @@ has_int128 = cc.links('''
a = a * a;
return 0;
}''')
config_host_data.set('CONFIG_INT128', has_int128)
if has_int128
if has_int128_type
# "do we have 128-bit atomics which are handled inline and specifically not
# via libatomic". The reason we can't use libatomic is documented in the
# comment starting "GCC is a house divided" in include/qemu/atomic128.h.
@ -2564,7 +2569,7 @@ if has_int128
# __alignof(unsigned __int128) for the host.
atomic_test_128 = '''
int main(int ac, char **av) {
unsigned __int128 *p = __builtin_assume_aligned(av[ac - 1], 16);
__uint128_t *p = __builtin_assume_aligned(av[ac - 1], 16);
p[1] = __atomic_load_n(&p[0], __ATOMIC_RELAXED);
__atomic_store_n(&p[2], p[3], __ATOMIC_RELAXED);
__atomic_compare_exchange_n(&p[4], &p[5], p[6], 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
@ -2586,7 +2591,7 @@ if has_int128
config_host_data.set('CONFIG_CMPXCHG128', cc.links('''
int main(void)
{
unsigned __int128 x = 0, y = 0;
__uint128_t x = 0, y = 0;
__sync_val_compare_and_swap_16(&x, y, x);
return 0;
}

View file

@ -35,6 +35,7 @@
formats = {}
allpatterns = []
anyextern = False
testforerror = False
translate_prefix = 'trans'
translate_scope = 'static '
@ -53,6 +54,80 @@
re_fmt_ident = '@[a-zA-Z0-9_]*'
re_pat_ident = '[a-zA-Z0-9_]*'
# Local implementation of a topological sort. We use the same API that
# the Python graphlib does, so that when QEMU moves forward to a
# baseline of Python 3.9 or newer this code can all be dropped and
# replaced with:
# from graphlib import TopologicalSorter, CycleError
#
# https://docs.python.org/3.9/library/graphlib.html#graphlib.TopologicalSorter
#
# We only implement the parts of TopologicalSorter we care about:
# ts = TopologicalSorter(graph=None)
# create the sorter. graph is a dictionary whose keys are
# nodes and whose values are lists of the predecessors of that node.
# (That is, if graph contains "A" -> ["B", "C"] then we must output
# B and C before A.)
# ts.static_order()
# returns a list of all the nodes in sorted order, or raises CycleError
# CycleError
# exception raised if there are cycles in the graph. The second
# element in the args attribute is a list of nodes which form a
# cycle; the first and last element are the same, eg [a, b, c, a]
# (Our implementation doesn't give the order correctly.)
#
# For our purposes we can assume that the data set is always small
# (typically 10 nodes or less, actual links in the graph very rare),
# so we don't need to worry about efficiency of implementation.
#
# The core of this implementation is from
# https://code.activestate.com/recipes/578272-topological-sort/
# (but updated to Python 3), and is under the MIT license.
class CycleError(ValueError):
"""Subclass of ValueError raised if cycles exist in the graph"""
pass
class TopologicalSorter:
"""Topologically sort a graph"""
def __init__(self, graph=None):
self.graph = graph
def static_order(self):
# We do the sort right here, unlike the stdlib version
from functools import reduce
data = {}
r = []
if not self.graph:
return []
# This code wants the values in the dict to be specifically sets
for k, v in self.graph.items():
data[k] = set(v)
# Find all items that don't depend on anything.
extra_items_in_deps = (reduce(set.union, data.values())
- set(data.keys()))
# Add empty dependencies where needed
data.update({item:{} for item in extra_items_in_deps})
while True:
ordered = set(item for item, dep in data.items() if not dep)
if not ordered:
break
r.extend(ordered)
data = {item: (dep - ordered)
for item, dep in data.items()
if item not in ordered}
if data:
# This doesn't give as nice results as the stdlib, which
# gives you the cycle by listing the nodes in order. Here
# we only know the nodes in the cycle but not their order.
raise CycleError(f'nodes are in a cycle', list(data.keys()))
return r
# end TopologicalSorter
def error_with_file(file, lineno, *args):
"""Print an error message from file:line and args and exit."""
global output_file
@ -70,8 +145,13 @@ def error_with_file(file, lineno, *args):
if output_file and output_fd:
output_fd.close()
os.remove(output_file)
exit(1)
# Do not try to remove e.g. -o /dev/null
if not output_file.startswith("/dev"):
try:
os.remove(output_file)
except PermissionError:
pass
exit(0 if testforerror else 1)
# end error_with_file
@ -205,11 +285,14 @@ def __str__(self):
s = ''
return str(self.pos) + ':' + s + str(self.len)
def str_extract(self):
def str_extract(self, lvalue_formatter):
global bitop_width
s = 's' if self.sign else ''
return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})'
def referenced_fields(self):
return []
def __eq__(self, other):
return self.sign == other.sign and self.mask == other.mask
@ -228,12 +311,12 @@ def __init__(self, subs, mask):
def __str__(self):
return str(self.subs)
def str_extract(self):
def str_extract(self, lvalue_formatter):
global bitop_width
ret = '0'
pos = 0
for f in reversed(self.subs):
ext = f.str_extract()
ext = f.str_extract(lvalue_formatter)
if pos == 0:
ret = ext
else:
@ -241,6 +324,12 @@ def str_extract(self):
pos += f.len
return ret
def referenced_fields(self):
l = []
for f in self.subs:
l.extend(f.referenced_fields())
return l
def __ne__(self, other):
if len(self.subs) != len(other.subs):
return True
@ -264,9 +353,12 @@ def __init__(self, value):
def __str__(self):
return str(self.value)
def str_extract(self):
def str_extract(self, lvalue_formatter):
return str(self.value)
def referenced_fields(self):
return []
def __cmp__(self, other):
return self.value - other.value
# end ConstField
@ -283,8 +375,12 @@ def __init__(self, func, base):
def __str__(self):
return self.func + '(' + str(self.base) + ')'
def str_extract(self):
return self.func + '(ctx, ' + self.base.str_extract() + ')'
def str_extract(self, lvalue_formatter):
return (self.func + '(ctx, '
+ self.base.str_extract(lvalue_formatter) + ')')
def referenced_fields(self):
return self.base.referenced_fields()
def __eq__(self, other):
return self.func == other.func and self.base == other.base
@ -304,9 +400,12 @@ def __init__(self, func):
def __str__(self):
return self.func
def str_extract(self):
def str_extract(self, lvalue_formatter):
return self.func + '(ctx)'
def referenced_fields(self):
return []
def __eq__(self, other):
return self.func == other.func
@ -314,6 +413,32 @@ def __ne__(self, other):
return not self.__eq__(other)
# end ParameterField
class NamedField:
"""Class representing a field already named in the pattern"""
def __init__(self, name, sign, len):
self.mask = 0
self.sign = sign
self.len = len
self.name = name
def __str__(self):
return self.name
def str_extract(self, lvalue_formatter):
global bitop_width
s = 's' if self.sign else ''
lvalue = lvalue_formatter(self.name)
return f'{s}extract{bitop_width}({lvalue}, 0, {self.len})'
def referenced_fields(self):
return [self.name]
def __eq__(self, other):
return self.name == other.name
def __ne__(self, other):
return not self.__eq__(other)
# end NamedField
class Arguments:
"""Class representing the extracted fields of a format"""
@ -337,7 +462,6 @@ def output_def(self):
output('} ', self.struct_name(), ';\n\n')
# end Arguments
class General:
"""Common code between instruction formats and instruction patterns"""
def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
@ -351,12 +475,59 @@ def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
self.fieldmask = fldm
self.fields = flds
self.width = w
self.dangling = None
def __str__(self):
return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
def str1(self, i):
return str_indent(i) + self.__str__()
def dangling_references(self):
# Return a list of all named references which aren't satisfied
# directly by this format/pattern. This will be either:
# * a format referring to a field which is specified by the
# pattern(s) using it
# * a pattern referring to a field which is specified by the
# format it uses
# * a user error (referring to a field that doesn't exist at all)
if self.dangling is None:
# Compute this once and cache the answer
dangling = []
for n, f in self.fields.items():
for r in f.referenced_fields():
if r not in self.fields:
dangling.append(r)
self.dangling = dangling
return self.dangling
def output_fields(self, indent, lvalue_formatter):
# We use a topological sort to ensure that any use of NamedField
# comes after the initialization of the field it is referencing.
graph = {}
for n, f in self.fields.items():
refs = f.referenced_fields()
graph[n] = refs
try:
ts = TopologicalSorter(graph)
for n in ts.static_order():
# We only want to emit assignments for the keys
# in our fields list, not for anything that ends up
# in the tsort graph only because it was referenced as
# a NamedField.
try:
f = self.fields[n]
output(indent, lvalue_formatter(n), ' = ',
f.str_extract(lvalue_formatter), ';\n')
except KeyError:
pass
except CycleError as e:
# The second element of args is a list of nodes which form
# a cycle (there might be others too, but only one is reported).
# Pretty-print it to tell the user.
cycle = ' => '.join(e.args[1])
error(self.lineno, 'field definitions form a cycle: ' + cycle)
# end General
@ -370,8 +541,7 @@ def extract_name(self):
def output_extract(self):
output('static void ', self.extract_name(), '(DisasContext *ctx, ',
self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
for n, f in self.fields.items():
output(' a->', n, ' = ', f.str_extract(), ';\n')
self.output_fields(str_indent(4), lambda n: 'a->' + n)
output('}\n\n')
# end Format
@ -392,11 +562,36 @@ def output_code(self, i, extracted, outerbits, outermask):
ind = str_indent(i)
arg = self.base.base.name
output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n')
# We might have named references in the format that refer to fields
# in the pattern, or named references in the pattern that refer
# to fields in the format. This affects whether we extract the fields
# for the format before or after the ones for the pattern.
# For simplicity we don't allow cross references in both directions.
# This is also where we catch the syntax error of referring to
# a nonexistent field.
fmt_refs = self.base.dangling_references()
for r in fmt_refs:
if r not in self.fields:
error(self.lineno, f'format refers to undefined field {r}')
pat_refs = self.dangling_references()
for r in pat_refs:
if r not in self.base.fields:
error(self.lineno, f'pattern refers to undefined field {r}')
if pat_refs and fmt_refs:
error(self.lineno, ('pattern that uses fields defined in format '
'cannot use format that uses fields defined '
'in pattern'))
if fmt_refs:
# pattern fields first
self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n)
assert not extracted, "dangling fmt refs but it was already extracted"
if not extracted:
output(ind, self.base.extract_name(),
'(ctx, &u.f_', arg, ', insn);\n')
for n, f in self.fields.items():
output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
if not fmt_refs:
# pattern fields last
self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n)
output(ind, 'if (', translate_prefix, '_', self.name,
'(ctx, &u.f_', arg, ')) return true;\n')
@ -473,7 +668,7 @@ def build_tree(self):
def prop_format(self):
for p in self.pats:
p.build_tree()
p.prop_format()
def prop_width(self):
width = None
@ -505,6 +700,12 @@ def output_code(self, i, extracted, outerbits, outermask):
output(ind, '}\n')
else:
p.output_code(i, extracted, p.fixedbits, p.fixedmask)
def build_tree(self):
if not self.pats:
error_with_file(self.file, self.lineno, 'empty pattern group')
super().build_tree()
#end IncMultiPattern
@ -536,8 +737,10 @@ def output_code(self, i, extracted, outerbits, outermask):
ind = str_indent(i)
# If we identified all nodes below have the same format,
# extract the fields now.
if not extracted and self.base:
# extract the fields now. But don't do it if the format relies
# on named fields from the insn pattern, as those won't have
# been initialised at this point.
if not extracted and self.base and not self.base.dangling_references():
output(ind, self.base.extract_name(),
'(ctx, &u.f_', self.base.base.name, ', insn);\n')
extracted = True
@ -623,7 +826,7 @@ def __build_tree(pats, outerbits, outermask):
return t
def build_tree(self):
super().prop_format()
super().build_tree()
self.tree = self.__build_tree(self.pats, self.fixedbits,
self.fixedmask)
@ -659,6 +862,7 @@ def parse_field(lineno, name, toks):
"""Parse one instruction field from TOKS at LINENO"""
global fields
global insnwidth
global re_C_ident
# A "simple" field will have only one entry;
# a "multifield" will have several.
@ -673,6 +877,25 @@ def parse_field(lineno, name, toks):
func = func[1]
continue
if re.fullmatch(re_C_ident + ':s[0-9]+', t):
# Signed named field
subtoks = t.split(':')
n = subtoks[0]
le = int(subtoks[1])
f = NamedField(n, True, le)
subs.append(f)
width += le
continue
if re.fullmatch(re_C_ident + ':[0-9]+', t):
# Unsigned named field
subtoks = t.split(':')
n = subtoks[0]
le = int(subtoks[1])
f = NamedField(n, False, le)
subs.append(f)
width += le
continue
if re.fullmatch('[0-9]+:s[0-9]+', t):
# Signed field extract
subtoks = t.split(':s')
@ -1286,11 +1509,12 @@ def main():
global bitop_width
global variablewidth
global anyextern
global testforerror
decode_scope = 'static '
long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=',
'static-decode=', 'varinsnwidth=']
'static-decode=', 'varinsnwidth=', 'test-for-error']
try:
(opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts)
except getopt.GetoptError as err:
@ -1319,6 +1543,8 @@ def main():
bitop_width = 64
elif insnwidth != 32:
error(0, 'cannot handle insns of width', insnwidth)
elif o == '--test-for-error':
testforerror = True
else:
assert False, 'unhandled option'
@ -1417,6 +1643,7 @@ def main():
if output_file:
output_fd.close()
exit(1 if testforerror else 0)
# end main

View file

@ -10,11 +10,10 @@
* tcg-target-con-str.h; the constraint combination is inclusive or.
*/
C_O0_I1(r)
C_O0_I2(lZ, l)
C_O0_I2(r, rA)
C_O0_I2(rZ, r)
C_O0_I2(w, r)
C_O1_I1(r, l)
C_O0_I3(rZ, rZ, r)
C_O1_I1(r, r)
C_O1_I1(w, r)
C_O1_I1(w, w)
@ -33,4 +32,5 @@ C_O1_I2(w, w, wO)
C_O1_I2(w, w, wZ)
C_O1_I3(w, w, w, w)
C_O1_I4(r, r, rA, rZ, rZ)
C_O2_I1(r, r, r)
C_O2_I4(r, r, rZ, rZ, rA, rMZ)

View file

@ -9,7 +9,6 @@
* REGS(letter, register_mask)
*/
REGS('r', ALL_GENERAL_REGS)
REGS('l', ALL_QLDST_REGS)
REGS('w', ALL_VECTOR_REGS)
/*

View file

@ -40,11 +40,12 @@ static const int tcg_target_reg_alloc_order[] = {
TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
TCG_REG_X16, TCG_REG_X17,
TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
/* X16 reserved as temporary */
/* X17 reserved as temporary */
/* X18 reserved by system */
/* X19 reserved for AREG0 */
/* X29 reserved as fp */
@ -71,8 +72,10 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
return TCG_REG_X0 + slot;
}
#define TCG_REG_TMP TCG_REG_X30
#define TCG_VEC_TMP TCG_REG_V31
#define TCG_REG_TMP0 TCG_REG_X16
#define TCG_REG_TMP1 TCG_REG_X17
#define TCG_REG_TMP2 TCG_REG_X30
#define TCG_VEC_TMP0 TCG_REG_V31
#ifndef CONFIG_SOFTMMU
#define TCG_REG_GUEST_BASE TCG_REG_X28
@ -129,14 +132,6 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
#define ALL_GENERAL_REGS 0xffffffffu
#define ALL_VECTOR_REGS 0xffffffff00000000ull
#ifdef CONFIG_SOFTMMU
#define ALL_QLDST_REGS \
(ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
(1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
#else
#define ALL_QLDST_REGS ALL_GENERAL_REGS
#endif
/* Match a constant valid for addition (12-bit, optionally shifted). */
static inline bool is_aimm(uint64_t val)
{
@ -390,6 +385,10 @@ typedef enum {
I3305_LDR_v64 = 0x5c000000,
I3305_LDR_v128 = 0x9c000000,
/* Load/store exclusive. */
I3306_LDXP = 0xc8600000,
I3306_STXP = 0xc8200000,
/* Load/store register. Described here as 3.3.12, but the helper
that emits them can transform to 3.3.10 or 3.3.13. */
I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
@ -454,6 +453,9 @@ typedef enum {
I3406_ADR = 0x10000000,
I3406_ADRP = 0x90000000,
/* Add/subtract extended register instructions. */
I3501_ADD = 0x0b200000,
/* Add/subtract shifted register instructions (without a shift). */
I3502_ADD = 0x0b000000,
I3502_ADDS = 0x2b000000,
@ -624,6 +626,12 @@ static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
}
static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs,
TCGReg rt, TCGReg rt2, TCGReg rn)
{
tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt);
}
static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
TCGReg rt, int imm19)
{
@ -706,6 +714,14 @@ static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
}
static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn,
TCGType sf, TCGReg rd, TCGReg rn,
TCGReg rm, int opt, int imm3)
{
tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 |
imm3 << 10 | rn << 5 | rd);
}
/* This function is for both 3.5.2 (Add/Subtract shifted register), for
the rare occasion when we actually want to supply a shift amount. */
static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
@ -984,7 +1000,7 @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg r, TCGReg base, intptr_t offset)
{
TCGReg temp = TCG_REG_TMP;
TCGReg temp = TCG_REG_TMP0;
if (offset < -0xffffff || offset > 0xffffff) {
tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
@ -1136,8 +1152,8 @@ static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
}
/* Worst-case scenario, move offset to temp register, use reg offset. */
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset);
tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0);
}
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
@ -1353,8 +1369,8 @@ static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
if (offset == sextract64(offset, 0, 26)) {
tcg_out_insn(s, 3206, BL, offset);
} else {
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
tcg_out_insn(s, 3207, BLR, TCG_REG_TMP);
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0);
}
}
@ -1491,7 +1507,7 @@ static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
AArch64Insn insn;
if (rl == ah || (!const_bh && rl == bh)) {
rl = TCG_REG_TMP;
rl = TCG_REG_TMP0;
}
if (const_bl) {
@ -1508,7 +1524,7 @@ static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
possibility of adding 0+const in the low part, and the
immediate add instructions encode XSP not XZR. Don't try
anything more elaborate here than loading another zero. */
al = TCG_REG_TMP;
al = TCG_REG_TMP0;
tcg_out_movi(s, ext, al, 0);
}
tcg_out_insn_3401(s, insn, ext, rl, al, bl);
@ -1549,7 +1565,7 @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
{
TCGReg a1 = a0;
if (is_ctz) {
a1 = TCG_REG_TMP;
a1 = TCG_REG_TMP0;
tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
}
if (const_b && b == (ext ? 64 : 32)) {
@ -1558,7 +1574,7 @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
AArch64Insn sel = I3506_CSEL;
tcg_out_cmp(s, ext, a0, 0, 1);
tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1);
if (const_b) {
if (b == -1) {
@ -1571,7 +1587,7 @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
b = d;
}
}
tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE);
}
}
@ -1588,7 +1604,7 @@ bool tcg_target_has_memory_bswap(MemOp memop)
}
static const TCGLdstHelperParam ldst_helper_param = {
.ntmp = 1, .tmp = { TCG_REG_TMP }
.ntmp = 1, .tmp = { TCG_REG_TMP0 }
};
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
@ -1633,19 +1649,19 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
TCGType addr_type = s->addr_type;
TCGLabelQemuLdst *ldst = NULL;
MemOp opc = get_memop(oi);
MemOp s_bits = opc & MO_SIZE;
unsigned a_mask;
h->aa = atom_and_align_for_opc(s, opc,
have_lse2 ? MO_ATOM_WITHIN16
: MO_ATOM_IFALIGN,
false);
s_bits == MO_128);
a_mask = (1 << h->aa.align) - 1;
#ifdef CONFIG_SOFTMMU
unsigned s_bits = opc & MO_SIZE;
unsigned s_mask = (1u << s_bits) - 1;
unsigned mem_index = get_mmuidx(oi);
TCGReg x3;
TCGReg addr_adj;
TCGType mask_type;
uint64_t compare_mask;
@ -1657,27 +1673,27 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
? TCG_TYPE_I64 : TCG_TYPE_I32);
/* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
/* Load env_tlb(env)->f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0,
TLB_MASK_TABLE_OFS(mem_index), 1, 0);
/* Extract the TLB index from the address into X0. */
tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
TCG_REG_X0, TCG_REG_X0, addr_reg,
TCG_REG_TMP0, TCG_REG_TMP0, addr_reg,
s->page_bits - CPU_TLB_ENTRY_BITS);
/* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
/* Add the tlb_table pointer, forming the CPUTLBEntry address in TMP1. */
tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0);
/* Load the tlb comparator into X0, and the fast path addend into X1. */
tcg_out_ld(s, addr_type, TCG_REG_X0, TCG_REG_X1,
/* Load the tlb comparator into TMP0, and the fast path addend into TMP1. */
tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1,
is_ld ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write));
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
offsetof(CPUTLBEntry, addend));
/*
@ -1686,25 +1702,26 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
* cross pages using the address of the last byte of the access.
*/
if (a_mask >= s_mask) {
x3 = addr_reg;
addr_adj = addr_reg;
} else {
addr_adj = TCG_REG_TMP2;
tcg_out_insn(s, 3401, ADDI, addr_type,
TCG_REG_X3, addr_reg, s_mask - a_mask);
x3 = TCG_REG_X3;
addr_adj, addr_reg, s_mask - a_mask);
}
compare_mask = (uint64_t)s->page_mask | a_mask;
/* Store the page mask part of the address into X3. */
tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_X3, x3, compare_mask);
/* Store the page mask part of the address into TMP2. */
tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2,
addr_adj, compare_mask);
/* Perform the address comparison. */
tcg_out_cmp(s, addr_type, TCG_REG_X0, TCG_REG_X3, 0);
tcg_out_cmp(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, 0);
/* If not equal, we jump to the slow path. */
ldst->label_ptr[0] = s->code_ptr;
tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
h->base = TCG_REG_X1,
h->base = TCG_REG_TMP1;
h->index = addr_reg;
h->index_ext = addr_type;
#else
@ -1822,6 +1839,108 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
}
}
static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
TCGReg addr_reg, MemOpIdx oi, bool is_ld)
{
TCGLabelQemuLdst *ldst;
HostAddress h;
TCGReg base;
bool use_pair;
ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
/* Compose the final address, as LDP/STP have no indexing. */
if (h.index == TCG_REG_XZR) {
base = h.base;
} else {
base = TCG_REG_TMP2;
if (h.index_ext == TCG_TYPE_I32) {
/* add base, base, index, uxtw */
tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base,
h.base, h.index, MO_32, 0);
} else {
/* add base, base, index */
tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index);
}
}
use_pair = h.aa.atom < MO_128 || have_lse2;
if (!use_pair) {
tcg_insn_unit *branch = NULL;
TCGReg ll, lh, sl, sh;
/*
* If we have already checked for 16-byte alignment, that's all
* we need. Otherwise we have determined that misaligned atomicity
* may be handled with two 8-byte loads.
*/
if (h.aa.align < MO_128) {
/*
* TODO: align should be MO_64, so we only need test bit 3,
* which means we could use TBNZ instead of ANDS+B_C.
*/
tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15);
branch = s->code_ptr;
tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
use_pair = true;
}
if (is_ld) {
/*
* 16-byte atomicity without LSE2 requires LDXP+STXP loop:
* ldxp lo, hi, [base]
* stxp t0, lo, hi, [base]
* cbnz t0, .-8
* Require no overlap between data{lo,hi} and base.
*/
if (datalo == base || datahi == base) {
tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base);
base = TCG_REG_TMP2;
}
ll = sl = datalo;
lh = sh = datahi;
} else {
/*
* 16-byte atomicity without LSE2 requires LDXP+STXP loop:
* 1: ldxp t0, t1, [base]
* stxp t0, lo, hi, [base]
* cbnz t0, 1b
*/
tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1);
ll = TCG_REG_TMP0;
lh = TCG_REG_TMP1;
sl = datalo;
sh = datahi;
}
tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base);
tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base);
tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2);
if (use_pair) {
/* "b .+8", branching across the one insn of use_pair. */
tcg_out_insn(s, 3206, B, 2);
reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr));
}
}
if (use_pair) {
if (is_ld) {
tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0);
} else {
tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0);
}
}
if (ldst) {
ldst->type = TCG_TYPE_I128;
ldst->datalo_reg = datalo;
ldst->datahi_reg = datahi;
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
}
static const tcg_insn_unit *tb_ret_addr;
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
@ -1847,7 +1966,7 @@ static void tcg_out_goto_tb(TCGContext *s, int which)
set_jmp_insn_offset(s, which);
tcg_out32(s, I3206_B);
tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
set_jmp_reset_offset(s, which);
}
@ -1866,7 +1985,7 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
ptrdiff_t i_offset = i_addr - jmp_rx;
/* Note that we asserted this in range in tcg_out_goto_tb. */
insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2);
insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2);
}
qatomic_set((uint32_t *)jmp_rw, insn);
flush_idcache_range(jmp_rx, jmp_rw, 4);
@ -2060,13 +2179,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_rem_i64:
case INDEX_op_rem_i32:
tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2);
tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
break;
case INDEX_op_remu_i64:
case INDEX_op_remu_i32:
tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2);
tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
break;
case INDEX_op_shl_i64:
@ -2110,8 +2229,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
if (c2) {
tcg_out_rotl(s, ext, a0, a1, a2);
} else {
tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2);
tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0);
}
break;
@ -2161,6 +2280,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_qemu_st_a64_i64:
tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
break;
case INDEX_op_qemu_ld_a32_i128:
case INDEX_op_qemu_ld_a64_i128:
tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true);
break;
case INDEX_op_qemu_st_a32_i128:
case INDEX_op_qemu_st_a64_i128:
tcg_out_qemu_ldst_i128(s, REG0(0), REG0(1), a2, args[3], false);
break;
case INDEX_op_bswap64_i64:
tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
@ -2517,8 +2644,8 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
break;
}
}
tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
a2 = TCG_VEC_TMP;
tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0);
a2 = TCG_VEC_TMP0;
}
if (is_scalar) {
insn = cmp_scalar_insn[cond];
@ -2799,12 +2926,18 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_qemu_ld_a64_i32:
case INDEX_op_qemu_ld_a32_i64:
case INDEX_op_qemu_ld_a64_i64:
return C_O1_I1(r, l);
return C_O1_I1(r, r);
case INDEX_op_qemu_ld_a32_i128:
case INDEX_op_qemu_ld_a64_i128:
return C_O2_I1(r, r, r);
case INDEX_op_qemu_st_a32_i32:
case INDEX_op_qemu_st_a64_i32:
case INDEX_op_qemu_st_a32_i64:
case INDEX_op_qemu_st_a64_i64:
return C_O0_I2(lZ, l);
return C_O0_I2(rZ, r);
case INDEX_op_qemu_st_a32_i128:
case INDEX_op_qemu_st_a64_i128:
return C_O0_I3(rZ, rZ, r);
case INDEX_op_deposit_i32:
case INDEX_op_deposit_i64:
@ -2900,9 +3033,11 @@ static void tcg_target_init(TCGContext *s)
s->reserved_regs = 0;
tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
}
/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */

View file

@ -16,7 +16,6 @@
#include "host/cpuinfo.h"
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 24
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
typedef enum {
@ -131,7 +130,16 @@ typedef enum {
#define TCG_TARGET_HAS_muluh_i64 1
#define TCG_TARGET_HAS_mulsh_i64 1
#define TCG_TARGET_HAS_qemu_ldst_i128 0
/*
* Without FEAT_LSE2, we must use LDXP+STXP to implement atomic 128-bit load,
* which requires writable pages. We must defer to the helper for user-only,
* but in system mode all ram is writable for the host.
*/
#ifdef CONFIG_USER_ONLY
#define TCG_TARGET_HAS_qemu_ldst_i128 have_lse2
#else
#define TCG_TARGET_HAS_qemu_ldst_i128 1
#endif
#define TCG_TARGET_HAS_v64 1
#define TCG_TARGET_HAS_v128 1

View file

@ -31,7 +31,6 @@ extern int arm_arch;
#define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7)
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
#define MAX_CODE_GEN_BUFFER_SIZE UINT32_MAX
typedef enum {

View file

@ -91,6 +91,8 @@ static const int tcg_target_reg_alloc_order[] = {
#endif
};
#define TCG_TMP_VEC TCG_REG_XMM5
static const int tcg_target_call_iarg_regs[] = {
#if TCG_TARGET_REG_BITS == 64
#if defined(_WIN64)
@ -319,6 +321,8 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
#define OPC_PCMPGTW (0x65 | P_EXT | P_DATA16)
#define OPC_PCMPGTD (0x66 | P_EXT | P_DATA16)
#define OPC_PCMPGTQ (0x37 | P_EXT38 | P_DATA16)
#define OPC_PEXTRD (0x16 | P_EXT3A | P_DATA16)
#define OPC_PINSRD (0x22 | P_EXT3A | P_DATA16)
#define OPC_PMAXSB (0x3c | P_EXT38 | P_DATA16)
#define OPC_PMAXSW (0xee | P_EXT | P_DATA16)
#define OPC_PMAXSD (0x3d | P_EXT38 | P_DATA16)
@ -1753,7 +1757,21 @@ typedef struct {
bool tcg_target_has_memory_bswap(MemOp memop)
{
return have_movbe;
TCGAtomAlign aa;
if (!have_movbe) {
return false;
}
if ((memop & MO_SIZE) < MO_128) {
return true;
}
/*
* Reject 16-byte memop with 16-byte atomicity, i.e. VMOVDQA,
* but do allow a pair of 64-bit operations, i.e. MOVBEQ.
*/
aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
return aa.atom < MO_128;
}
/*
@ -1781,6 +1799,30 @@ static const TCGLdstHelperParam ldst_helper_param = {
static const TCGLdstHelperParam ldst_helper_param = { };
#endif
static void tcg_out_vec_to_pair(TCGContext *s, TCGType type,
TCGReg l, TCGReg h, TCGReg v)
{
int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
/* vpmov{d,q} %v, %l */
tcg_out_vex_modrm(s, OPC_MOVD_EyVy + rexw, v, 0, l);
/* vpextr{d,q} $1, %v, %h */
tcg_out_vex_modrm(s, OPC_PEXTRD + rexw, v, 0, h);
tcg_out8(s, 1);
}
static void tcg_out_pair_to_vec(TCGContext *s, TCGType type,
TCGReg v, TCGReg l, TCGReg h)
{
int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
/* vmov{d,q} %l, %v */
tcg_out_vex_modrm(s, OPC_MOVD_VyEy + rexw, v, 0, l);
/* vpinsr{d,q} $1, %h, %v, %v */
tcg_out_vex_modrm(s, OPC_PINSRD + rexw, v, v, h);
tcg_out8(s, 1);
}
/*
* Generate code for the slow path for a load at the end of block
*/
@ -1870,6 +1912,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
{
TCGLabelQemuLdst *ldst = NULL;
MemOp opc = get_memop(oi);
MemOp s_bits = opc & MO_SIZE;
unsigned a_mask;
#ifdef CONFIG_SOFTMMU
@ -1880,7 +1923,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
*h = x86_guest_base;
#endif
h->base = addrlo;
h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
a_mask = (1 << h->aa.align) - 1;
#ifdef CONFIG_SOFTMMU
@ -1890,7 +1933,6 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
TCGType tlbtype = TCG_TYPE_I32;
int trexw = 0, hrexw = 0, tlbrexw = 0;
unsigned mem_index = get_mmuidx(oi);
unsigned s_bits = opc & MO_SIZE;
unsigned s_mask = (1 << s_bits) - 1;
int tlb_mask;
@ -2070,6 +2112,72 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
h.base, h.index, 0, h.ofs + 4);
}
break;
case MO_128:
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
/*
* Without 16-byte atomicity, use integer regs.
* That is where we want the data, and it allows bswaps.
*/
if (h.aa.atom < MO_128) {
if (use_movbe) {
TCGReg t = datalo;
datalo = datahi;
datahi = t;
}
if (h.base == datalo || h.index == datalo) {
tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, datahi,
h.base, h.index, 0, h.ofs);
tcg_out_modrm_offset(s, movop + P_REXW + h.seg,
datalo, datahi, 0);
tcg_out_modrm_offset(s, movop + P_REXW + h.seg,
datahi, datahi, 8);
} else {
tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo,
h.base, h.index, 0, h.ofs);
tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datahi,
h.base, h.index, 0, h.ofs + 8);
}
break;
}
/*
* With 16-byte atomicity, a vector load is required.
* If we already have 16-byte alignment, then VMOVDQA always works.
* Else if VMOVDQU has atomicity with dynamic alignment, use that.
* Else use we require a runtime test for alignment for VMOVDQA;
* use VMOVDQU on the unaligned nonatomic path for simplicity.
*/
if (h.aa.align >= MO_128) {
tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_VxWx + h.seg,
TCG_TMP_VEC, 0,
h.base, h.index, 0, h.ofs);
} else if (cpuinfo & CPUINFO_ATOMIC_VMOVDQU) {
tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_VxWx + h.seg,
TCG_TMP_VEC, 0,
h.base, h.index, 0, h.ofs);
} else {
TCGLabel *l1 = gen_new_label();
TCGLabel *l2 = gen_new_label();
tcg_out_testi(s, h.base, 15);
tcg_out_jxx(s, JCC_JNE, l1, true);
tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_VxWx + h.seg,
TCG_TMP_VEC, 0,
h.base, h.index, 0, h.ofs);
tcg_out_jxx(s, JCC_JMP, l2, true);
tcg_out_label(s, l1);
tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_VxWx + h.seg,
TCG_TMP_VEC, 0,
h.base, h.index, 0, h.ofs);
tcg_out_label(s, l2);
}
tcg_out_vec_to_pair(s, TCG_TYPE_I64, datalo, datahi, TCG_TMP_VEC);
break;
default:
g_assert_not_reached();
}
@ -2140,6 +2248,63 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
h.base, h.index, 0, h.ofs + 4);
}
break;
case MO_128:
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
/*
* Without 16-byte atomicity, use integer regs.
* That is where we have the data, and it allows bswaps.
*/
if (h.aa.atom < MO_128) {
if (use_movbe) {
TCGReg t = datalo;
datalo = datahi;
datahi = t;
}
tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo,
h.base, h.index, 0, h.ofs);
tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datahi,
h.base, h.index, 0, h.ofs + 8);
break;
}
/*
* With 16-byte atomicity, a vector store is required.
* If we already have 16-byte alignment, then VMOVDQA always works.
* Else if VMOVDQU has atomicity with dynamic alignment, use that.
* Else use we require a runtime test for alignment for VMOVDQA;
* use VMOVDQU on the unaligned nonatomic path for simplicity.
*/
tcg_out_pair_to_vec(s, TCG_TYPE_I64, TCG_TMP_VEC, datalo, datahi);
if (h.aa.align >= MO_128) {
tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_WxVx + h.seg,
TCG_TMP_VEC, 0,
h.base, h.index, 0, h.ofs);
} else if (cpuinfo & CPUINFO_ATOMIC_VMOVDQU) {
tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_WxVx + h.seg,
TCG_TMP_VEC, 0,
h.base, h.index, 0, h.ofs);
} else {
TCGLabel *l1 = gen_new_label();
TCGLabel *l2 = gen_new_label();
tcg_out_testi(s, h.base, 15);
tcg_out_jxx(s, JCC_JNE, l1, true);
tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_WxVx + h.seg,
TCG_TMP_VEC, 0,
h.base, h.index, 0, h.ofs);
tcg_out_jxx(s, JCC_JMP, l2, true);
tcg_out_label(s, l1);
tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_WxVx + h.seg,
TCG_TMP_VEC, 0,
h.base, h.index, 0, h.ofs);
tcg_out_label(s, l2);
}
break;
default:
g_assert_not_reached();
}
@ -2470,6 +2635,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_qemu_ld(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
}
break;
case INDEX_op_qemu_ld_a32_i128:
case INDEX_op_qemu_ld_a64_i128:
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128);
break;
case INDEX_op_qemu_st_a64_i32:
case INDEX_op_qemu_st8_a64_i32:
@ -2496,6 +2666,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_qemu_st(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
}
break;
case INDEX_op_qemu_st_a32_i128:
case INDEX_op_qemu_st_a64_i128:
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128);
break;
OP_32_64(mulu2):
tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
@ -3193,6 +3368,15 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_qemu_st_a64_i64:
return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I4(L, L, L, L);
case INDEX_op_qemu_ld_a32_i128:
case INDEX_op_qemu_ld_a64_i128:
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
return C_O2_I1(r, r, L);
case INDEX_op_qemu_st_a32_i128:
case INDEX_op_qemu_st_a64_i128:
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
return C_O0_I3(L, L, L);
case INDEX_op_brcond2_i32:
return C_O0_I4(r, r, ri, ri);
@ -3962,6 +4146,7 @@ static void tcg_target_init(TCGContext *s)
s->reserved_regs = 0;
tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
tcg_regset_set_reg(s->reserved_regs, TCG_TMP_VEC);
#ifdef _WIN64
/* These are call saved, and we don't save them, so don't use them. */
tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM6);

View file

@ -28,7 +28,6 @@
#include "host/cpuinfo.h"
#define TCG_TARGET_INSN_UNIT_SIZE 1
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 31
#ifdef __x86_64__
# define TCG_TARGET_REG_BITS 64
@ -118,7 +117,6 @@ typedef enum {
#define have_avx1 (cpuinfo & CPUINFO_AVX1)
#define have_avx2 (cpuinfo & CPUINFO_AVX2)
#define have_movbe (cpuinfo & CPUINFO_MOVBE)
#define have_atomic16 (cpuinfo & CPUINFO_ATOMIC_VMOVDQA)
/*
* There are interesting instructions in AVX512, so long as we have AVX512VL,
@ -202,7 +200,8 @@ typedef enum {
#define TCG_TARGET_HAS_qemu_st8_i32 1
#endif
#define TCG_TARGET_HAS_qemu_ldst_i128 0
#define TCG_TARGET_HAS_qemu_ldst_i128 \
(TCG_TARGET_REG_BITS == 64 && (cpuinfo & CPUINFO_ATOMIC_VMOVDQA))
/* We do not support older SSE systems, only beginning with AVX1. */
#define TCG_TARGET_HAS_v64 have_avx1

View file

@ -36,7 +36,6 @@
#endif
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
#define TCG_TARGET_NB_REGS 32
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)

View file

@ -14,6 +14,7 @@ C_O0_I2(r, r)
C_O0_I2(r, ri)
C_O0_I2(v, r)
C_O0_I3(r, r, r)
C_O0_I3(o, m, r)
C_O0_I4(r, r, ri, ri)
C_O0_I4(r, r, r, r)
C_O1_I1(r, r)
@ -34,6 +35,7 @@ C_O1_I3(v, v, v, v)
C_O1_I4(r, r, ri, rZ, rZ)
C_O1_I4(r, r, r, ri, ri)
C_O2_I1(r, r, r)
C_O2_I1(o, m, r)
C_O2_I2(r, r, r, r)
C_O2_I4(r, r, rI, rZM, r, r)
C_O2_I4(r, r, r, r, rI, rZM)

View file

@ -9,6 +9,7 @@
* REGS(letter, register_mask)
*/
REGS('r', ALL_GENERAL_REGS)
REGS('o', ALL_GENERAL_REGS & 0xAAAAAAAAu) /* odd registers */
REGS('v', ALL_VECTOR_REGS)
/*

View file

@ -295,25 +295,27 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
#define B OPCD( 18)
#define BC OPCD( 16)
#define LBZ OPCD( 34)
#define LHZ OPCD( 40)
#define LHA OPCD( 42)
#define LWZ OPCD( 32)
#define LWZUX XO31( 55)
#define STB OPCD( 38)
#define STH OPCD( 44)
#define STW OPCD( 36)
#define STD XO62( 0)
#define STDU XO62( 1)
#define STDX XO31(149)
#define LD XO58( 0)
#define LDX XO31( 21)
#define LDU XO58( 1)
#define LDUX XO31( 53)
#define LWA XO58( 2)
#define LWAX XO31(341)
#define LQ OPCD( 56)
#define STB OPCD( 38)
#define STH OPCD( 44)
#define STW OPCD( 36)
#define STD XO62( 0)
#define STDU XO62( 1)
#define STDX XO31(149)
#define STQ XO62( 2)
#define ADDIC OPCD( 12)
#define ADDI OPCD( 14)
@ -2020,7 +2022,18 @@ typedef struct {
bool tcg_target_has_memory_bswap(MemOp memop)
{
return true;
TCGAtomAlign aa;
if ((memop & MO_SIZE) <= MO_64) {
return true;
}
/*
* Reject 16-byte memop with 16-byte atomicity,
* but do allow a pair of 64-bit operations.
*/
aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
return aa.atom <= MO_64;
}
/*
@ -2035,7 +2048,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
{
TCGLabelQemuLdst *ldst = NULL;
MemOp opc = get_memop(oi);
MemOp a_bits;
MemOp a_bits, s_bits;
/*
* Book II, Section 1.4, Single-Copy Atomicity, specifies:
@ -2047,10 +2060,11 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
* As of 3.0, "the non-atomic access is performed as described in
* the corresponding list", which matches MO_ATOM_SUBALIGN.
*/
s_bits = opc & MO_SIZE;
h->aa = atom_and_align_for_opc(s, opc,
have_isa_3_00 ? MO_ATOM_SUBALIGN
: MO_ATOM_IFALIGN,
false);
s_bits == MO_128);
a_bits = h->aa.align;
#ifdef CONFIG_SOFTMMU
@ -2060,7 +2074,6 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
int table_off = fast_off + offsetof(CPUTLBDescFast, table);
unsigned s_bits = opc & MO_SIZE;
ldst = new_ldst_label(s);
ldst->is_ld = is_ld;
@ -2303,6 +2316,60 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
}
}
static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
TCGReg addr_reg, MemOpIdx oi, bool is_ld)
{
TCGLabelQemuLdst *ldst;
HostAddress h;
bool need_bswap;
uint32_t insn;
TCGReg index;
ldst = prepare_host_addr(s, &h, addr_reg, -1, oi, is_ld);
/* Compose the final address, as LQ/STQ have no indexing. */
index = h.index;
if (h.base != 0) {
index = TCG_REG_TMP1;
tcg_out32(s, ADD | TAB(index, h.base, h.index));
}
need_bswap = get_memop(oi) & MO_BSWAP;
if (h.aa.atom == MO_128) {
tcg_debug_assert(!need_bswap);
tcg_debug_assert(datalo & 1);
tcg_debug_assert(datahi == datalo - 1);
insn = is_ld ? LQ : STQ;
tcg_out32(s, insn | TAI(datahi, index, 0));
} else {
TCGReg d1, d2;
if (HOST_BIG_ENDIAN ^ need_bswap) {
d1 = datahi, d2 = datalo;
} else {
d1 = datalo, d2 = datahi;
}
if (need_bswap) {
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8);
insn = is_ld ? LDBRX : STDBRX;
tcg_out32(s, insn | TAB(d1, 0, index));
tcg_out32(s, insn | TAB(d2, index, TCG_REG_R0));
} else {
insn = is_ld ? LD : STD;
tcg_out32(s, insn | TAI(d1, index, 0));
tcg_out32(s, insn | TAI(d2, index, 8));
}
}
if (ldst) {
ldst->type = TCG_TYPE_I128;
ldst->datalo_reg = datalo;
ldst->datahi_reg = datahi;
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
}
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
{
int i;
@ -2860,6 +2927,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
args[4], TCG_TYPE_I64);
}
break;
case INDEX_op_qemu_ld_a32_i128:
case INDEX_op_qemu_ld_a64_i128:
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
break;
case INDEX_op_qemu_st_a64_i32:
if (TCG_TARGET_REG_BITS == 32) {
@ -2889,6 +2961,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
args[4], TCG_TYPE_I64);
}
break;
case INDEX_op_qemu_st_a32_i128:
case INDEX_op_qemu_st_a64_i128:
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
break;
case INDEX_op_setcond_i32:
tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
@ -3722,6 +3799,13 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_qemu_st_a64_i64:
return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r);
case INDEX_op_qemu_ld_a32_i128:
case INDEX_op_qemu_ld_a64_i128:
return C_O2_I1(o, m, r);
case INDEX_op_qemu_st_a32_i128:
case INDEX_op_qemu_st_a64_i128:
return C_O0_I3(o, m, r);
case INDEX_op_add_vec:
case INDEX_op_sub_vec:
case INDEX_op_mul_vec:

View file

@ -34,7 +34,6 @@
#define TCG_TARGET_NB_REGS 64
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
typedef enum {
TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3,
@ -149,7 +148,8 @@ extern bool have_vsx;
#define TCG_TARGET_HAS_mulsh_i64 1
#endif
#define TCG_TARGET_HAS_qemu_ldst_i128 0
#define TCG_TARGET_HAS_qemu_ldst_i128 \
(TCG_TARGET_REG_BITS == 64 && have_isa_2_07)
/*
* While technically Altivec could support V64, it has no 64-bit store

View file

@ -35,7 +35,6 @@
#define TCG_TARGET_REG_BITS 64
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 20
#define TCG_TARGET_NB_REGS 32
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)

View file

@ -14,6 +14,7 @@ C_O0_I2(r, r)
C_O0_I2(r, ri)
C_O0_I2(r, rA)
C_O0_I2(v, r)
C_O0_I3(o, m, r)
C_O1_I1(r, r)
C_O1_I1(v, r)
C_O1_I1(v, v)
@ -36,6 +37,7 @@ C_O1_I2(v, v, v)
C_O1_I3(v, v, v, v)
C_O1_I4(r, r, ri, rI, r)
C_O1_I4(r, r, rA, rI, r)
C_O2_I1(o, m, r)
C_O2_I2(o, m, 0, r)
C_O2_I2(o, m, r, r)
C_O2_I3(o, m, 0, 1, r)

View file

@ -243,6 +243,7 @@ typedef enum S390Opcode {
RXY_LLGF = 0xe316,
RXY_LLGH = 0xe391,
RXY_LMG = 0xeb04,
RXY_LPQ = 0xe38f,
RXY_LRV = 0xe31e,
RXY_LRVG = 0xe30f,
RXY_LRVH = 0xe31f,
@ -253,6 +254,7 @@ typedef enum S390Opcode {
RXY_STG = 0xe324,
RXY_STHY = 0xe370,
RXY_STMG = 0xeb24,
RXY_STPQ = 0xe38e,
RXY_STRV = 0xe33e,
RXY_STRVG = 0xe32f,
RXY_STRVH = 0xe33f,
@ -1577,7 +1579,18 @@ typedef struct {
bool tcg_target_has_memory_bswap(MemOp memop)
{
return true;
TCGAtomAlign aa;
if ((memop & MO_SIZE) <= MO_64) {
return true;
}
/*
* Reject 16-byte memop with 16-byte atomicity,
* but do allow a pair of 64-bit operations.
*/
aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
return aa.atom <= MO_64;
}
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
@ -1734,13 +1747,13 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
{
TCGLabelQemuLdst *ldst = NULL;
MemOp opc = get_memop(oi);
MemOp s_bits = opc & MO_SIZE;
unsigned a_mask;
h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
a_mask = (1 << h->aa.align) - 1;
#ifdef CONFIG_SOFTMMU
unsigned s_bits = opc & MO_SIZE;
unsigned s_mask = (1 << s_bits) - 1;
int mem_index = get_mmuidx(oi);
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
@ -1865,6 +1878,80 @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
}
}
static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
TCGReg addr_reg, MemOpIdx oi, bool is_ld)
{
TCGLabel *l1 = NULL, *l2 = NULL;
TCGLabelQemuLdst *ldst;
HostAddress h;
bool need_bswap;
bool use_pair;
S390Opcode insn;
ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
use_pair = h.aa.atom < MO_128;
need_bswap = get_memop(oi) & MO_BSWAP;
if (!use_pair) {
/*
* Atomicity requires we use LPQ. If we've already checked for
* 16-byte alignment, that's all we need. If we arrive with
* lesser alignment, we have determined that less than 16-byte
* alignment can be satisfied with two 8-byte loads.
*/
if (h.aa.align < MO_128) {
use_pair = true;
l1 = gen_new_label();
l2 = gen_new_label();
tcg_out_insn(s, RI, TMLL, addr_reg, 15);
tgen_branch(s, 7, l1); /* CC in {1,2,3} */
}
tcg_debug_assert(!need_bswap);
tcg_debug_assert(datalo & 1);
tcg_debug_assert(datahi == datalo - 1);
insn = is_ld ? RXY_LPQ : RXY_STPQ;
tcg_out_insn_RXY(s, insn, datahi, h.base, h.index, h.disp);
if (use_pair) {
tgen_branch(s, S390_CC_ALWAYS, l2);
tcg_out_label(s, l1);
}
}
if (use_pair) {
TCGReg d1, d2;
if (need_bswap) {
d1 = datalo, d2 = datahi;
insn = is_ld ? RXY_LRVG : RXY_STRVG;
} else {
d1 = datahi, d2 = datalo;
insn = is_ld ? RXY_LG : RXY_STG;
}
if (h.base == d1 || h.index == d1) {
tcg_out_insn(s, RXY, LAY, TCG_TMP0, h.base, h.index, h.disp);
h.base = TCG_TMP0;
h.index = TCG_REG_NONE;
h.disp = 0;
}
tcg_out_insn_RXY(s, insn, d1, h.base, h.index, h.disp);
tcg_out_insn_RXY(s, insn, d2, h.base, h.index, h.disp + 8);
}
if (l2) {
tcg_out_label(s, l2);
}
if (ldst) {
ldst->type = TCG_TYPE_I128;
ldst->datalo_reg = datalo;
ldst->datahi_reg = datahi;
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
}
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
{
/* Reuse the zeroing that exists for goto_ptr. */
@ -2226,6 +2313,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_qemu_st_a64_i64:
tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I64);
break;
case INDEX_op_qemu_ld_a32_i128:
case INDEX_op_qemu_ld_a64_i128:
tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
break;
case INDEX_op_qemu_st_a32_i128:
case INDEX_op_qemu_st_a64_i128:
tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
break;
case INDEX_op_ld16s_i64:
tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
@ -3107,6 +3202,12 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_qemu_st_a32_i32:
case INDEX_op_qemu_st_a64_i32:
return C_O0_I2(r, r);
case INDEX_op_qemu_ld_a32_i128:
case INDEX_op_qemu_ld_a64_i128:
return C_O2_I1(o, m, r);
case INDEX_op_qemu_st_a32_i128:
case INDEX_op_qemu_st_a64_i128:
return C_O0_I3(o, m, r);
case INDEX_op_deposit_i32:
case INDEX_op_deposit_i64:

View file

@ -26,7 +26,6 @@
#define S390_TCG_TARGET_H
#define TCG_TARGET_INSN_UNIT_SIZE 2
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 19
/* We have a +- 4GB range on the branches; leave some slop. */
#define MAX_CODE_GEN_BUFFER_SIZE (3 * GiB)
@ -140,7 +139,7 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_muluh_i64 0
#define TCG_TARGET_HAS_mulsh_i64 0
#define TCG_TARGET_HAS_qemu_ldst_i128 0
#define TCG_TARGET_HAS_qemu_ldst_i128 1
#define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR)
#define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR)

View file

@ -26,7 +26,6 @@
#define SPARC_TCG_TARGET_H
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
#define TCG_TARGET_NB_REGS 32
#define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)

View file

@ -5736,8 +5736,8 @@ static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
mov[0].dst = ldst->datalo_reg;
mov[0].src =
tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
mov[0].dst_type = TCG_TYPE_I32;
mov[0].src_type = TCG_TYPE_I32;
mov[0].dst_type = TCG_TYPE_REG;
mov[0].src_type = TCG_TYPE_REG;
mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
mov[1].dst = ldst->datahi_reg;

View file

@ -42,7 +42,6 @@
#define TCG_TARGET_INTERPRETER 1
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
#if UINTPTR_MAX == UINT32_MAX

View file

@ -1,24 +0,0 @@
#!/bin/sh
# This work is licensed under the terms of the GNU LGPL, version 2 or later.
# See the COPYING.LIB file in the top-level directory.
PYTHON=$1
DECODETREE=$2
E=0
# All of these tests should produce errors
for i in err_*.decode; do
if $PYTHON $DECODETREE $i > /dev/null 2> /dev/null; then
# Pass, aka failed to fail.
echo FAIL: $i 1>&2
E=1
fi
done
for i in succ_*.decode; do
if ! $PYTHON $DECODETREE $i > /dev/null 2> /dev/null; then
echo FAIL:$i 1>&2
fi
done
exit $E

View file

@ -0,0 +1,7 @@
# This work is licensed under the terms of the GNU LGPL, version 2 or later.
# See the COPYING.LIB file in the top-level directory.
# Diagnose formats which refer to undefined fields
%field1 field2:3
@fmt ........ ........ ........ ........ %field1
insn 00000000 00000000 00000000 00000000 @fmt

View file

@ -0,0 +1,7 @@
# This work is licensed under the terms of the GNU LGPL, version 2 or later.
# See the COPYING.LIB file in the top-level directory.
# Diagnose fields whose definitions form a loop
%field1 field2:3
%field2 field1:4
insn 00000000 00000000 00000000 00000000 %field1 %field2

View file

@ -0,0 +1,8 @@
# This work is licensed under the terms of the GNU LGPL, version 2 or later.
# See the COPYING.LIB file in the top-level directory.
# Diagnose patterns which refer to undefined fields
&f1 f1 a
%field1 field2:3
@fmt ........ ........ ........ .... a:4 &f1
insn 00000000 00000000 00000000 0000 .... @fmt f1=%field1

View file

@ -0,0 +1,14 @@
# This work is licensed under the terms of the GNU LGPL, version 2 or later.
# See the COPYING.LIB file in the top-level directory.
# Diagnose fields where the format refers to a field defined in the
# pattern and the pattern refers to a field defined in the format.
# This is theoretically not impossible to implement, but is not
# supported by the script at this time.
&abcd a b c d
%refa a:3
%refc c:4
# Format defines 'c' and sets 'b' to an indirect ref to 'a'
@fmt ........ ........ ........ c:8 &abcd b=%refa
# Pattern defines 'a' and sets 'd' to an indirect ref to 'c'
insn 00000000 00000000 00000000 ........ @fmt d=%refc a=6

64
tests/decode/meson.build Normal file
View file

@ -0,0 +1,64 @@
err_tests = [
'err_argset1.decode',
'err_argset2.decode',
'err_field1.decode',
'err_field2.decode',
'err_field3.decode',
'err_field4.decode',
'err_field5.decode',
'err_field6.decode',
'err_field7.decode',
'err_field8.decode',
'err_field9.decode',
'err_field10.decode',
'err_init1.decode',
'err_init2.decode',
'err_init3.decode',
'err_init4.decode',
'err_overlap1.decode',
'err_overlap2.decode',
'err_overlap3.decode',
'err_overlap4.decode',
'err_overlap5.decode',
'err_overlap6.decode',
'err_overlap7.decode',
'err_overlap8.decode',
'err_overlap9.decode',
'err_pattern_group_empty.decode',
'err_pattern_group_ident1.decode',
'err_pattern_group_ident2.decode',
'err_pattern_group_nest1.decode',
'err_pattern_group_nest2.decode',
'err_pattern_group_nest3.decode',
'err_pattern_group_overlap1.decode',
'err_width1.decode',
'err_width2.decode',
'err_width3.decode',
'err_width4.decode',
]
succ_tests = [
'succ_argset_type1.decode',
'succ_function.decode',
'succ_ident1.decode',
'succ_named_field.decode',
'succ_pattern_group_nest1.decode',
'succ_pattern_group_nest2.decode',
'succ_pattern_group_nest3.decode',
'succ_pattern_group_nest4.decode',
]
suite = 'decodetree'
decodetree = find_program(meson.project_source_root() / 'scripts/decodetree.py')
foreach t: err_tests
test(fs.replace_suffix(t, ''),
decodetree, args: ['-o', '/dev/null', '--test-for-error', files(t)],
suite: suite)
endforeach
foreach t: succ_tests
test(fs.replace_suffix(t, ''),
decodetree, args: ['-o', '/dev/null', files(t)],
suite: suite)
endforeach

View file

@ -0,0 +1,19 @@
# This work is licensed under the terms of the GNU LGPL, version 2 or later.
# See the COPYING.LIB file in the top-level directory.
# field using a named_field
%imm_sz 8:8 sz:3
insn 00000000 00000000 ........ 00000000 imm_sz=%imm_sz sz=1
# Ditto, via a format. Here a field in the format
# references a named field defined in the insn pattern:
&imm_a imm alpha
%foo 0:16 alpha:4
@foo 00000001 ........ ........ ........ &imm_a imm=%foo
i1 ........ 00000000 ........ ........ @foo alpha=1
i2 ........ 00000001 ........ ........ @foo alpha=2
# Here the named field is defined in the format and referenced
# from the insn pattern:
@bar 00000010 ........ ........ ........ &imm_a alpha=4
i3 ........ 00000000 ........ ........ @bar imm=%foo

View file

@ -74,10 +74,7 @@ if have_tools and have_vhost_user and 'CONFIG_LINUX' in config_host
dependencies: [qemuutil, vhost_user])
endif
test('decodetree', sh,
args: [ files('decode/check.sh'), config_host['PYTHON'], files('../scripts/decodetree.py') ],
workdir: meson.current_source_dir() / 'decode',
suite: 'decodetree')
subdir('decode')
if 'CONFIG_TCG' in config_all
subdir('fp')