mirror of
https://gitlab.com/qemu-project/qemu
synced 2024-10-14 15:02:54 +00:00
tcg: Implement gvec support for rotate by immediate
No host backend support yet, but the interfaces for rotli are in place. Canonicalize immediate rotate to the left, based on a survey of architectures, but provide both left and right shift interfaces to the translators. Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
cccdd8c797
commit
b0f7e7444c
|
@ -716,6 +716,54 @@ void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
|
||||||
clear_high(d, oprsz, desc);
|
clear_high(d, oprsz, desc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void HELPER(gvec_rotl8i)(void *d, void *a, uint32_t desc)
|
||||||
|
{
|
||||||
|
intptr_t oprsz = simd_oprsz(desc);
|
||||||
|
int shift = simd_data(desc);
|
||||||
|
intptr_t i;
|
||||||
|
|
||||||
|
for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
|
||||||
|
*(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), shift);
|
||||||
|
}
|
||||||
|
clear_high(d, oprsz, desc);
|
||||||
|
}
|
||||||
|
|
||||||
|
void HELPER(gvec_rotl16i)(void *d, void *a, uint32_t desc)
|
||||||
|
{
|
||||||
|
intptr_t oprsz = simd_oprsz(desc);
|
||||||
|
int shift = simd_data(desc);
|
||||||
|
intptr_t i;
|
||||||
|
|
||||||
|
for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
|
||||||
|
*(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), shift);
|
||||||
|
}
|
||||||
|
clear_high(d, oprsz, desc);
|
||||||
|
}
|
||||||
|
|
||||||
|
void HELPER(gvec_rotl32i)(void *d, void *a, uint32_t desc)
|
||||||
|
{
|
||||||
|
intptr_t oprsz = simd_oprsz(desc);
|
||||||
|
int shift = simd_data(desc);
|
||||||
|
intptr_t i;
|
||||||
|
|
||||||
|
for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
|
||||||
|
*(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), shift);
|
||||||
|
}
|
||||||
|
clear_high(d, oprsz, desc);
|
||||||
|
}
|
||||||
|
|
||||||
|
void HELPER(gvec_rotl64i)(void *d, void *a, uint32_t desc)
|
||||||
|
{
|
||||||
|
intptr_t oprsz = simd_oprsz(desc);
|
||||||
|
int shift = simd_data(desc);
|
||||||
|
intptr_t i;
|
||||||
|
|
||||||
|
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
|
||||||
|
*(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), shift);
|
||||||
|
}
|
||||||
|
clear_high(d, oprsz, desc);
|
||||||
|
}
|
||||||
|
|
||||||
void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
|
void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
|
||||||
{
|
{
|
||||||
intptr_t oprsz = simd_oprsz(desc);
|
intptr_t oprsz = simd_oprsz(desc);
|
||||||
|
|
|
@ -259,6 +259,11 @@ DEF_HELPER_FLAGS_3(gvec_sar16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||||
DEF_HELPER_FLAGS_3(gvec_sar32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
DEF_HELPER_FLAGS_3(gvec_sar32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||||
DEF_HELPER_FLAGS_3(gvec_sar64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
DEF_HELPER_FLAGS_3(gvec_sar64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||||
|
|
||||||
|
DEF_HELPER_FLAGS_3(gvec_rotl8i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||||
|
DEF_HELPER_FLAGS_3(gvec_rotl16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||||
|
DEF_HELPER_FLAGS_3(gvec_rotl32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||||
|
DEF_HELPER_FLAGS_3(gvec_rotl64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||||
|
|
||||||
DEF_HELPER_FLAGS_4(gvec_shl8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
DEF_HELPER_FLAGS_4(gvec_shl8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||||
DEF_HELPER_FLAGS_4(gvec_shl16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
DEF_HELPER_FLAGS_4(gvec_shl16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||||
DEF_HELPER_FLAGS_4(gvec_shl32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
DEF_HELPER_FLAGS_4(gvec_shl32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||||
|
|
|
@ -334,6 +334,10 @@ void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||||
int64_t shift, uint32_t oprsz, uint32_t maxsz);
|
int64_t shift, uint32_t oprsz, uint32_t maxsz);
|
||||||
void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs,
|
void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||||
int64_t shift, uint32_t oprsz, uint32_t maxsz);
|
int64_t shift, uint32_t oprsz, uint32_t maxsz);
|
||||||
|
void tcg_gen_gvec_rotli(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||||
|
int64_t shift, uint32_t oprsz, uint32_t maxsz);
|
||||||
|
void tcg_gen_gvec_rotri(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||||
|
int64_t shift, uint32_t oprsz, uint32_t maxsz);
|
||||||
|
|
||||||
void tcg_gen_gvec_shls(unsigned vece, uint32_t dofs, uint32_t aofs,
|
void tcg_gen_gvec_shls(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||||
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
|
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
|
||||||
|
@ -388,5 +392,7 @@ void tcg_gen_vec_shr8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
|
||||||
void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
|
void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
|
||||||
void tcg_gen_vec_sar8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
|
void tcg_gen_vec_sar8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
|
||||||
void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
|
void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
|
||||||
|
void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
|
||||||
|
void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -999,6 +999,8 @@ void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
|
||||||
void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
|
void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
|
||||||
void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
|
void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
|
||||||
void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
|
void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
|
||||||
|
void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
|
||||||
|
void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
|
||||||
|
|
||||||
void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
|
void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
|
||||||
void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
|
void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
|
||||||
|
|
|
@ -248,6 +248,7 @@ DEF(not_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_not_vec))
|
||||||
DEF(shli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
|
DEF(shli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
|
||||||
DEF(shri_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
|
DEF(shri_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
|
||||||
DEF(sari_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
|
DEF(sari_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
|
||||||
|
DEF(rotli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_roti_vec))
|
||||||
|
|
||||||
DEF(shls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
|
DEF(shls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
|
||||||
DEF(shrs_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
|
DEF(shrs_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
|
||||||
|
|
|
@ -182,6 +182,7 @@ typedef uint64_t TCGRegSet;
|
||||||
#define TCG_TARGET_HAS_not_vec 0
|
#define TCG_TARGET_HAS_not_vec 0
|
||||||
#define TCG_TARGET_HAS_andc_vec 0
|
#define TCG_TARGET_HAS_andc_vec 0
|
||||||
#define TCG_TARGET_HAS_orc_vec 0
|
#define TCG_TARGET_HAS_orc_vec 0
|
||||||
|
#define TCG_TARGET_HAS_roti_vec 0
|
||||||
#define TCG_TARGET_HAS_shi_vec 0
|
#define TCG_TARGET_HAS_shi_vec 0
|
||||||
#define TCG_TARGET_HAS_shs_vec 0
|
#define TCG_TARGET_HAS_shs_vec 0
|
||||||
#define TCG_TARGET_HAS_shv_vec 0
|
#define TCG_TARGET_HAS_shv_vec 0
|
||||||
|
|
|
@ -605,10 +605,11 @@ E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32.
|
||||||
|
|
||||||
* shri_vec v0, v1, i2
|
* shri_vec v0, v1, i2
|
||||||
* sari_vec v0, v1, i2
|
* sari_vec v0, v1, i2
|
||||||
|
* rotli_vec v0, v1, i2
|
||||||
* shrs_vec v0, v1, s2
|
* shrs_vec v0, v1, s2
|
||||||
* sars_vec v0, v1, s2
|
* sars_vec v0, v1, s2
|
||||||
|
|
||||||
Similarly for logical and arithmetic right shift.
|
Similarly for logical and arithmetic right shift, and left rotate.
|
||||||
|
|
||||||
* shlv_vec v0, v1, v2
|
* shlv_vec v0, v1, v2
|
||||||
|
|
||||||
|
|
|
@ -133,6 +133,7 @@ typedef enum {
|
||||||
#define TCG_TARGET_HAS_not_vec 1
|
#define TCG_TARGET_HAS_not_vec 1
|
||||||
#define TCG_TARGET_HAS_neg_vec 1
|
#define TCG_TARGET_HAS_neg_vec 1
|
||||||
#define TCG_TARGET_HAS_abs_vec 1
|
#define TCG_TARGET_HAS_abs_vec 1
|
||||||
|
#define TCG_TARGET_HAS_roti_vec 0
|
||||||
#define TCG_TARGET_HAS_shi_vec 1
|
#define TCG_TARGET_HAS_shi_vec 1
|
||||||
#define TCG_TARGET_HAS_shs_vec 0
|
#define TCG_TARGET_HAS_shs_vec 0
|
||||||
#define TCG_TARGET_HAS_shv_vec 1
|
#define TCG_TARGET_HAS_shv_vec 1
|
||||||
|
|
|
@ -183,6 +183,7 @@ extern bool have_avx2;
|
||||||
#define TCG_TARGET_HAS_not_vec 0
|
#define TCG_TARGET_HAS_not_vec 0
|
||||||
#define TCG_TARGET_HAS_neg_vec 0
|
#define TCG_TARGET_HAS_neg_vec 0
|
||||||
#define TCG_TARGET_HAS_abs_vec 1
|
#define TCG_TARGET_HAS_abs_vec 1
|
||||||
|
#define TCG_TARGET_HAS_roti_vec 0
|
||||||
#define TCG_TARGET_HAS_shi_vec 1
|
#define TCG_TARGET_HAS_shi_vec 1
|
||||||
#define TCG_TARGET_HAS_shs_vec 1
|
#define TCG_TARGET_HAS_shs_vec 1
|
||||||
#define TCG_TARGET_HAS_shv_vec have_avx2
|
#define TCG_TARGET_HAS_shv_vec have_avx2
|
||||||
|
|
|
@ -161,6 +161,7 @@ extern bool have_vsx;
|
||||||
#define TCG_TARGET_HAS_not_vec 1
|
#define TCG_TARGET_HAS_not_vec 1
|
||||||
#define TCG_TARGET_HAS_neg_vec have_isa_3_00
|
#define TCG_TARGET_HAS_neg_vec have_isa_3_00
|
||||||
#define TCG_TARGET_HAS_abs_vec 0
|
#define TCG_TARGET_HAS_abs_vec 0
|
||||||
|
#define TCG_TARGET_HAS_roti_vec 0
|
||||||
#define TCG_TARGET_HAS_shi_vec 0
|
#define TCG_TARGET_HAS_shi_vec 0
|
||||||
#define TCG_TARGET_HAS_shs_vec 0
|
#define TCG_TARGET_HAS_shs_vec 0
|
||||||
#define TCG_TARGET_HAS_shv_vec 1
|
#define TCG_TARGET_HAS_shv_vec 1
|
||||||
|
|
|
@ -2694,6 +2694,74 @@ void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
|
||||||
|
{
|
||||||
|
uint64_t mask = dup_const(MO_8, 0xff << c);
|
||||||
|
|
||||||
|
tcg_gen_shli_i64(d, a, c);
|
||||||
|
tcg_gen_shri_i64(a, a, 8 - c);
|
||||||
|
tcg_gen_andi_i64(d, d, mask);
|
||||||
|
tcg_gen_andi_i64(a, a, ~mask);
|
||||||
|
tcg_gen_or_i64(d, d, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
|
||||||
|
{
|
||||||
|
uint64_t mask = dup_const(MO_16, 0xffff << c);
|
||||||
|
|
||||||
|
tcg_gen_shli_i64(d, a, c);
|
||||||
|
tcg_gen_shri_i64(a, a, 16 - c);
|
||||||
|
tcg_gen_andi_i64(d, d, mask);
|
||||||
|
tcg_gen_andi_i64(a, a, ~mask);
|
||||||
|
tcg_gen_or_i64(d, d, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void tcg_gen_gvec_rotli(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||||
|
int64_t shift, uint32_t oprsz, uint32_t maxsz)
|
||||||
|
{
|
||||||
|
static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
|
||||||
|
static const GVecGen2i g[4] = {
|
||||||
|
{ .fni8 = tcg_gen_vec_rotl8i_i64,
|
||||||
|
.fniv = tcg_gen_rotli_vec,
|
||||||
|
.fno = gen_helper_gvec_rotl8i,
|
||||||
|
.opt_opc = vecop_list,
|
||||||
|
.vece = MO_8 },
|
||||||
|
{ .fni8 = tcg_gen_vec_rotl16i_i64,
|
||||||
|
.fniv = tcg_gen_rotli_vec,
|
||||||
|
.fno = gen_helper_gvec_rotl16i,
|
||||||
|
.opt_opc = vecop_list,
|
||||||
|
.vece = MO_16 },
|
||||||
|
{ .fni4 = tcg_gen_rotli_i32,
|
||||||
|
.fniv = tcg_gen_rotli_vec,
|
||||||
|
.fno = gen_helper_gvec_rotl32i,
|
||||||
|
.opt_opc = vecop_list,
|
||||||
|
.vece = MO_32 },
|
||||||
|
{ .fni8 = tcg_gen_rotli_i64,
|
||||||
|
.fniv = tcg_gen_rotli_vec,
|
||||||
|
.fno = gen_helper_gvec_rotl64i,
|
||||||
|
.opt_opc = vecop_list,
|
||||||
|
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
|
||||||
|
.vece = MO_64 },
|
||||||
|
};
|
||||||
|
|
||||||
|
tcg_debug_assert(vece <= MO_64);
|
||||||
|
tcg_debug_assert(shift >= 0 && shift < (8 << vece));
|
||||||
|
if (shift == 0) {
|
||||||
|
tcg_gen_gvec_mov(vece, dofs, aofs, oprsz, maxsz);
|
||||||
|
} else {
|
||||||
|
tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, shift, &g[vece]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void tcg_gen_gvec_rotri(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||||
|
int64_t shift, uint32_t oprsz, uint32_t maxsz)
|
||||||
|
{
|
||||||
|
tcg_debug_assert(vece <= MO_64);
|
||||||
|
tcg_debug_assert(shift >= 0 && shift < (8 << vece));
|
||||||
|
tcg_gen_gvec_rotli(vece, dofs, aofs, -shift & ((8 << vece) - 1),
|
||||||
|
oprsz, maxsz);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Specialized generation vector shifts by a non-constant scalar.
|
* Specialized generation vector shifts by a non-constant scalar.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -545,6 +545,18 @@ void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
|
||||||
do_shifti(INDEX_op_sari_vec, vece, r, a, i);
|
do_shifti(INDEX_op_sari_vec, vece, r, a, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
|
||||||
|
{
|
||||||
|
do_shifti(INDEX_op_rotli_vec, vece, r, a, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
|
||||||
|
{
|
||||||
|
int bits = 8 << vece;
|
||||||
|
tcg_debug_assert(i >= 0 && i < bits);
|
||||||
|
do_shifti(INDEX_op_rotli_vec, vece, r, a, -i & (bits - 1));
|
||||||
|
}
|
||||||
|
|
||||||
void tcg_gen_cmp_vec(TCGCond cond, unsigned vece,
|
void tcg_gen_cmp_vec(TCGCond cond, unsigned vece,
|
||||||
TCGv_vec r, TCGv_vec a, TCGv_vec b)
|
TCGv_vec r, TCGv_vec a, TCGv_vec b)
|
||||||
{
|
{
|
||||||
|
|
|
@ -1661,6 +1661,8 @@ bool tcg_op_supported(TCGOpcode op)
|
||||||
case INDEX_op_shrv_vec:
|
case INDEX_op_shrv_vec:
|
||||||
case INDEX_op_sarv_vec:
|
case INDEX_op_sarv_vec:
|
||||||
return have_vec && TCG_TARGET_HAS_shv_vec;
|
return have_vec && TCG_TARGET_HAS_shv_vec;
|
||||||
|
case INDEX_op_rotli_vec:
|
||||||
|
return have_vec && TCG_TARGET_HAS_roti_vec;
|
||||||
case INDEX_op_ssadd_vec:
|
case INDEX_op_ssadd_vec:
|
||||||
case INDEX_op_usadd_vec:
|
case INDEX_op_usadd_vec:
|
||||||
case INDEX_op_sssub_vec:
|
case INDEX_op_sssub_vec:
|
||||||
|
|
Loading…
Reference in a new issue