mirror of
https://gitlab.com/qemu-project/qemu
synced 2024-11-05 20:35:44 +00:00
tcg/ppc: Update vector support for VSX
The VSX instruction set instructions include double-word loads and stores, double-word load and splat, double-word permute, and bit select. All of which require multiple operations in the Altivec instruction set. Because the VSX registers map %vsr32 to %vr0, and we have no current intention or need to use vector registers outside %vr0-%vr19, force on the {ax,bx,cx,tx} bits within the added VSX insns so that we don't have to otherwise modify the VR[TABC] macros. Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
This commit is contained in:
parent
68f340d4cd
commit
47c906ae6f
2 changed files with 51 additions and 6 deletions
|
@ -66,6 +66,7 @@ typedef enum {
|
|||
|
||||
extern TCGPowerISA have_isa;
|
||||
extern bool have_altivec;
|
||||
extern bool have_vsx;
|
||||
|
||||
#define have_isa_2_06 (have_isa >= tcg_isa_2_06)
|
||||
#define have_isa_3_00 (have_isa >= tcg_isa_3_00)
|
||||
|
@ -149,7 +150,7 @@ extern bool have_altivec;
|
|||
* instruction and substituting two 32-bit stores makes the generated
|
||||
* code quite large.
|
||||
*/
|
||||
#define TCG_TARGET_HAS_v64 0
|
||||
#define TCG_TARGET_HAS_v64 have_vsx
|
||||
#define TCG_TARGET_HAS_v128 have_altivec
|
||||
#define TCG_TARGET_HAS_v256 0
|
||||
|
||||
|
@ -165,7 +166,7 @@ extern bool have_altivec;
|
|||
#define TCG_TARGET_HAS_mul_vec 1
|
||||
#define TCG_TARGET_HAS_sat_vec 1
|
||||
#define TCG_TARGET_HAS_minmax_vec 1
|
||||
#define TCG_TARGET_HAS_bitsel_vec 0
|
||||
#define TCG_TARGET_HAS_bitsel_vec have_vsx
|
||||
#define TCG_TARGET_HAS_cmpsel_vec 0
|
||||
|
||||
void flush_icache_range(uintptr_t start, uintptr_t stop);
|
||||
|
|
|
@ -67,6 +67,7 @@ static tcg_insn_unit *tb_ret_addr;
|
|||
TCGPowerISA have_isa;
|
||||
static bool have_isel;
|
||||
bool have_altivec;
|
||||
bool have_vsx;
|
||||
|
||||
#ifndef CONFIG_SOFTMMU
|
||||
#define TCG_GUEST_BASE_REG 30
|
||||
|
@ -467,9 +468,12 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
|
|||
#define LVEBX XO31(7)
|
||||
#define LVEHX XO31(39)
|
||||
#define LVEWX XO31(71)
|
||||
#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */
|
||||
#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */
|
||||
|
||||
#define STVX XO31(231)
|
||||
#define STVEWX XO31(199)
|
||||
#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */
|
||||
|
||||
#define VADDSBS VX4(768)
|
||||
#define VADDUBS VX4(512)
|
||||
|
@ -558,6 +562,9 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
|
|||
|
||||
#define VSLDOI VX4(44)
|
||||
|
||||
#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */
|
||||
#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
|
||||
|
||||
#define RT(r) ((r)<<21)
|
||||
#define RS(r) ((r)<<21)
|
||||
#define RA(r) ((r)<<16)
|
||||
|
@ -884,11 +891,21 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
|
|||
add = 0;
|
||||
}
|
||||
|
||||
load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
|
||||
if (TCG_TARGET_REG_BITS == 64) {
|
||||
new_pool_l2(s, rel, s->code_ptr, add, val, val);
|
||||
if (have_vsx) {
|
||||
load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
|
||||
load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
|
||||
if (TCG_TARGET_REG_BITS == 64) {
|
||||
new_pool_label(s, val, rel, s->code_ptr, add);
|
||||
} else {
|
||||
new_pool_l2(s, rel, s->code_ptr, add, val, val);
|
||||
}
|
||||
} else {
|
||||
new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val);
|
||||
load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
|
||||
if (TCG_TARGET_REG_BITS == 64) {
|
||||
new_pool_l2(s, rel, s->code_ptr, add, val, val);
|
||||
} else {
|
||||
new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val);
|
||||
}
|
||||
}
|
||||
|
||||
if (USE_REG_TB) {
|
||||
|
@ -1136,6 +1153,10 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
|
|||
/* fallthru */
|
||||
case TCG_TYPE_V64:
|
||||
tcg_debug_assert(ret >= TCG_REG_V0);
|
||||
if (have_vsx) {
|
||||
tcg_out_mem_long(s, 0, LXSDX, ret, base, offset);
|
||||
break;
|
||||
}
|
||||
tcg_debug_assert((offset & 7) == 0);
|
||||
tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
|
||||
if (offset & 8) {
|
||||
|
@ -1180,6 +1201,10 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
|
|||
/* fallthru */
|
||||
case TCG_TYPE_V64:
|
||||
tcg_debug_assert(arg >= TCG_REG_V0);
|
||||
if (have_vsx) {
|
||||
tcg_out_mem_long(s, 0, STXSDX, arg, base, offset);
|
||||
break;
|
||||
}
|
||||
tcg_debug_assert((offset & 7) == 0);
|
||||
if (offset & 8) {
|
||||
tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
|
||||
|
@ -2899,6 +2924,8 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
|
|||
case INDEX_op_shri_vec:
|
||||
case INDEX_op_sari_vec:
|
||||
return vece <= MO_32 ? -1 : 0;
|
||||
case INDEX_op_bitsel_vec:
|
||||
return have_vsx;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
@ -2925,6 +2952,10 @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
|
|||
tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
|
||||
break;
|
||||
case MO_64:
|
||||
if (have_vsx) {
|
||||
tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
|
||||
break;
|
||||
}
|
||||
tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
|
||||
tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
|
||||
break;
|
||||
|
@ -2968,6 +2999,10 @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
|
|||
tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
|
||||
break;
|
||||
case MO_64:
|
||||
if (have_vsx) {
|
||||
tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
|
||||
break;
|
||||
}
|
||||
tcg_debug_assert((offset & 7) == 0);
|
||||
tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
|
||||
tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
|
||||
|
@ -3102,6 +3137,10 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
|
|||
}
|
||||
break;
|
||||
|
||||
case INDEX_op_bitsel_vec:
|
||||
tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3]));
|
||||
return;
|
||||
|
||||
case INDEX_op_dup2_vec:
|
||||
assert(TCG_TARGET_REG_BITS == 32);
|
||||
/* With inputs a1 = xLxx, a2 = xHxx */
|
||||
|
@ -3497,6 +3536,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
|
|||
case INDEX_op_st_vec:
|
||||
case INDEX_op_dupm_vec:
|
||||
return &v_r;
|
||||
case INDEX_op_bitsel_vec:
|
||||
case INDEX_op_ppc_msum_vec:
|
||||
return &v_v_v_v;
|
||||
|
||||
|
@ -3530,6 +3570,10 @@ static void tcg_target_init(TCGContext *s)
|
|||
|
||||
if (hwcap & PPC_FEATURE_HAS_ALTIVEC) {
|
||||
have_altivec = true;
|
||||
/* We only care about the portion of VSX that overlaps Altivec. */
|
||||
if (hwcap & PPC_FEATURE_HAS_VSX) {
|
||||
have_vsx = true;
|
||||
}
|
||||
}
|
||||
|
||||
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
|
||||
|
|
Loading…
Reference in a new issue