target/loongarch: Implement vreplve vpack vpick

This patch includes:
- VREPLVE[I].{B/H/W/D};
- VBSLL.V, VBSRL.V;
- VPACK{EV/OD}.{B/H/W/D};
- VPICK{EV/OD}.{B/H/W/D}.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Song Gao <gaosong@loongson.cn>
Message-Id: <20230504122810.4094787-40-gaosong@loongson.cn>
This commit is contained in:
Song Gao 2023-05-04 20:28:05 +08:00
parent cdbdefbf5a
commit d5e5563cb3
No known key found for this signature in database
GPG key ID: 40A2FFF239263EDF
5 changed files with 319 additions and 0 deletions

View file

@ -833,6 +833,11 @@ static void output_vr(DisasContext *ctx, arg_vr *a, const char *mnemonic)
output(ctx, mnemonic, "v%d, r%d", a->vd, a->rj);
}
static void output_vvr(DisasContext *ctx, arg_vvr *a, const char *mnemonic)
{
output(ctx, mnemonic, "v%d, v%d, r%d", a->vd, a->vj, a->rk);
}
INSN_LSX(vadd_b, vvv)
INSN_LSX(vadd_h, vvv)
INSN_LSX(vadd_w, vvv)
@ -1594,3 +1599,33 @@ INSN_LSX(vreplgr2vr_b, vr)
INSN_LSX(vreplgr2vr_h, vr)
INSN_LSX(vreplgr2vr_w, vr)
INSN_LSX(vreplgr2vr_d, vr)
INSN_LSX(vreplve_b, vvr)
INSN_LSX(vreplve_h, vvr)
INSN_LSX(vreplve_w, vvr)
INSN_LSX(vreplve_d, vvr)
INSN_LSX(vreplvei_b, vv_i)
INSN_LSX(vreplvei_h, vv_i)
INSN_LSX(vreplvei_w, vv_i)
INSN_LSX(vreplvei_d, vv_i)
INSN_LSX(vbsll_v, vv_i)
INSN_LSX(vbsrl_v, vv_i)
INSN_LSX(vpackev_b, vvv)
INSN_LSX(vpackev_h, vvv)
INSN_LSX(vpackev_w, vvv)
INSN_LSX(vpackev_d, vvv)
INSN_LSX(vpackod_b, vvv)
INSN_LSX(vpackod_h, vvv)
INSN_LSX(vpackod_w, vvv)
INSN_LSX(vpackod_d, vvv)
INSN_LSX(vpickev_b, vvv)
INSN_LSX(vpickev_h, vvv)
INSN_LSX(vpickev_w, vvv)
INSN_LSX(vpickev_d, vvv)
INSN_LSX(vpickod_b, vvv)
INSN_LSX(vpickod_h, vvv)
INSN_LSX(vpickod_w, vvv)
INSN_LSX(vpickod_d, vvv)

View file

@ -653,3 +653,21 @@ DEF_HELPER_3(vsetallnez_b, void, env, i32, i32)
DEF_HELPER_3(vsetallnez_h, void, env, i32, i32)
DEF_HELPER_3(vsetallnez_w, void, env, i32, i32)
DEF_HELPER_3(vsetallnez_d, void, env, i32, i32)
DEF_HELPER_4(vpackev_b, void, env, i32, i32, i32)
DEF_HELPER_4(vpackev_h, void, env, i32, i32, i32)
DEF_HELPER_4(vpackev_w, void, env, i32, i32, i32)
DEF_HELPER_4(vpackev_d, void, env, i32, i32, i32)
DEF_HELPER_4(vpackod_b, void, env, i32, i32, i32)
DEF_HELPER_4(vpackod_h, void, env, i32, i32, i32)
DEF_HELPER_4(vpackod_w, void, env, i32, i32, i32)
DEF_HELPER_4(vpackod_d, void, env, i32, i32, i32)
DEF_HELPER_4(vpickev_b, void, env, i32, i32, i32)
DEF_HELPER_4(vpickev_h, void, env, i32, i32, i32)
DEF_HELPER_4(vpickev_w, void, env, i32, i32, i32)
DEF_HELPER_4(vpickev_d, void, env, i32, i32, i32)
DEF_HELPER_4(vpickod_b, void, env, i32, i32, i32)
DEF_HELPER_4(vpickod_h, void, env, i32, i32, i32)
DEF_HELPER_4(vpickod_w, void, env, i32, i32, i32)
DEF_HELPER_4(vpickod_d, void, env, i32, i32, i32)

View file

@ -3933,3 +3933,147 @@ TRANS(vreplgr2vr_b, gvec_dup, MO_8)
TRANS(vreplgr2vr_h, gvec_dup, MO_16)
TRANS(vreplgr2vr_w, gvec_dup, MO_32)
TRANS(vreplgr2vr_d, gvec_dup, MO_64)
static bool trans_vreplvei_b(DisasContext *ctx, arg_vv_i *a)
{
CHECK_SXE;
tcg_gen_gvec_dup_mem(MO_8,vec_full_offset(a->vd),
offsetof(CPULoongArchState,
fpr[a->vj].vreg.B((a->imm))),
16, ctx->vl/8);
return true;
}
static bool trans_vreplvei_h(DisasContext *ctx, arg_vv_i *a)
{
CHECK_SXE;
tcg_gen_gvec_dup_mem(MO_16, vec_full_offset(a->vd),
offsetof(CPULoongArchState,
fpr[a->vj].vreg.H((a->imm))),
16, ctx->vl/8);
return true;
}
static bool trans_vreplvei_w(DisasContext *ctx, arg_vv_i *a)
{
CHECK_SXE;
tcg_gen_gvec_dup_mem(MO_32, vec_full_offset(a->vd),
offsetof(CPULoongArchState,
fpr[a->vj].vreg.W((a->imm))),
16, ctx->vl/8);
return true;
}
static bool trans_vreplvei_d(DisasContext *ctx, arg_vv_i *a)
{
CHECK_SXE;
tcg_gen_gvec_dup_mem(MO_64, vec_full_offset(a->vd),
offsetof(CPULoongArchState,
fpr[a->vj].vreg.D((a->imm))),
16, ctx->vl/8);
return true;
}
static bool gen_vreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit,
void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
{
TCGv_i64 t0 = tcg_temp_new_i64();
TCGv_ptr t1 = tcg_temp_new_ptr();
TCGv_i64 t2 = tcg_temp_new_i64();
CHECK_SXE;
tcg_gen_andi_i64(t0, gpr_src(ctx, a->rk, EXT_NONE), (LSX_LEN/bit) -1);
tcg_gen_shli_i64(t0, t0, vece);
if (HOST_BIG_ENDIAN) {
tcg_gen_xori_i64(t0, t0, vece << ((LSX_LEN/bit) -1));
}
tcg_gen_trunc_i64_ptr(t1, t0);
tcg_gen_add_ptr(t1, t1, cpu_env);
func(t2, t1, vec_full_offset(a->vj));
tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), 16, ctx->vl/8, t2);
return true;
}
TRANS(vreplve_b, gen_vreplve, MO_8, 8, tcg_gen_ld8u_i64)
TRANS(vreplve_h, gen_vreplve, MO_16, 16, tcg_gen_ld16u_i64)
TRANS(vreplve_w, gen_vreplve, MO_32, 32, tcg_gen_ld32u_i64)
TRANS(vreplve_d, gen_vreplve, MO_64, 64, tcg_gen_ld_i64)
static bool trans_vbsll_v(DisasContext *ctx, arg_vv_i *a)
{
int ofs;
TCGv_i64 desthigh, destlow, high, low;
CHECK_SXE;
desthigh = tcg_temp_new_i64();
destlow = tcg_temp_new_i64();
high = tcg_temp_new_i64();
low = tcg_temp_new_i64();
get_vreg64(low, a->vj, 0);
ofs = ((a->imm) & 0xf) * 8;
if (ofs < 64) {
get_vreg64(high, a->vj, 1);
tcg_gen_extract2_i64(desthigh, low, high, 64 - ofs);
tcg_gen_shli_i64(destlow, low, ofs);
} else {
tcg_gen_shli_i64(desthigh, low, ofs - 64);
destlow = tcg_constant_i64(0);
}
set_vreg64(desthigh, a->vd, 1);
set_vreg64(destlow, a->vd, 0);
return true;
}
static bool trans_vbsrl_v(DisasContext *ctx, arg_vv_i *a)
{
TCGv_i64 desthigh, destlow, high, low;
int ofs;
CHECK_SXE;
desthigh = tcg_temp_new_i64();
destlow = tcg_temp_new_i64();
high = tcg_temp_new_i64();
low = tcg_temp_new_i64();
get_vreg64(high, a->vj, 1);
ofs = ((a->imm) & 0xf) * 8;
if (ofs < 64) {
get_vreg64(low, a->vj, 0);
tcg_gen_extract2_i64(destlow, low, high, ofs);
tcg_gen_shri_i64(desthigh, high, ofs);
} else {
tcg_gen_shri_i64(destlow, high, ofs - 64);
desthigh = tcg_constant_i64(0);
}
set_vreg64(desthigh, a->vd, 1);
set_vreg64(destlow, a->vd, 0);
return true;
}
TRANS(vpackev_b, gen_vvv, gen_helper_vpackev_b)
TRANS(vpackev_h, gen_vvv, gen_helper_vpackev_h)
TRANS(vpackev_w, gen_vvv, gen_helper_vpackev_w)
TRANS(vpackev_d, gen_vvv, gen_helper_vpackev_d)
TRANS(vpackod_b, gen_vvv, gen_helper_vpackod_b)
TRANS(vpackod_h, gen_vvv, gen_helper_vpackod_h)
TRANS(vpackod_w, gen_vvv, gen_helper_vpackod_w)
TRANS(vpackod_d, gen_vvv, gen_helper_vpackod_d)
TRANS(vpickev_b, gen_vvv, gen_helper_vpickev_b)
TRANS(vpickev_h, gen_vvv, gen_helper_vpickev_h)
TRANS(vpickev_w, gen_vvv, gen_helper_vpickev_w)
TRANS(vpickev_d, gen_vvv, gen_helper_vpickev_d)
TRANS(vpickod_b, gen_vvv, gen_helper_vpickod_b)
TRANS(vpickod_h, gen_vvv, gen_helper_vpickod_h)
TRANS(vpickod_w, gen_vvv, gen_helper_vpickod_w)
TRANS(vpickod_d, gen_vvv, gen_helper_vpickod_d)

View file

@ -499,6 +499,7 @@ dbcl 0000 00000010 10101 ............... @i15
&vr_i vd rj imm
&rv_i rd vj imm
&vr vd rj
&vvr vd vj rk
#
# LSX Formats
@ -506,6 +507,8 @@ dbcl 0000 00000010 10101 ............... @i15
@vv .... ........ ..... ..... vj:5 vd:5 &vv
@cv .... ........ ..... ..... vj:5 .. cd:3 &cv
@vvv .... ........ ..... vk:5 vj:5 vd:5 &vvv
@vv_ui1 .... ........ ..... .... imm:1 vj:5 vd:5 &vv_i
@vv_ui2 .... ........ ..... ... imm:2 vj:5 vd:5 &vv_i
@vv_ui3 .... ........ ..... .. imm:3 vj:5 vd:5 &vv_i
@vv_ui4 .... ........ ..... . imm:4 vj:5 vd:5 &vv_i
@vv_ui5 .... ........ ..... imm:5 vj:5 vd:5 &vv_i
@ -524,6 +527,7 @@ dbcl 0000 00000010 10101 ............... @i15
@rv_ui2 .... ........ ..... ... imm:2 vj:5 rd:5 &rv_i
@rv_ui1 .... ........ ..... .... imm:1 vj:5 rd:5 &rv_i
@vr .... ........ ..... ..... rj:5 vd:5 &vr
@vvr .... ........ ..... rk:5 vj:5 vd:5 &vvr
vadd_b 0111 00000000 10100 ..... ..... ..... @vvv
vadd_h 0111 00000000 10101 ..... ..... ..... @vvv
@ -1197,3 +1201,33 @@ vreplgr2vr_b 0111 00101001 11110 00000 ..... ..... @vr
vreplgr2vr_h 0111 00101001 11110 00001 ..... ..... @vr
vreplgr2vr_w 0111 00101001 11110 00010 ..... ..... @vr
vreplgr2vr_d 0111 00101001 11110 00011 ..... ..... @vr
vreplve_b 0111 00010010 00100 ..... ..... ..... @vvr
vreplve_h 0111 00010010 00101 ..... ..... ..... @vvr
vreplve_w 0111 00010010 00110 ..... ..... ..... @vvr
vreplve_d 0111 00010010 00111 ..... ..... ..... @vvr
vreplvei_b 0111 00101111 01111 0 .... ..... ..... @vv_ui4
vreplvei_h 0111 00101111 01111 10 ... ..... ..... @vv_ui3
vreplvei_w 0111 00101111 01111 110 .. ..... ..... @vv_ui2
vreplvei_d 0111 00101111 01111 1110 . ..... ..... @vv_ui1
vbsll_v 0111 00101000 11100 ..... ..... ..... @vv_ui5
vbsrl_v 0111 00101000 11101 ..... ..... ..... @vv_ui5
vpackev_b 0111 00010001 01100 ..... ..... ..... @vvv
vpackev_h 0111 00010001 01101 ..... ..... ..... @vvv
vpackev_w 0111 00010001 01110 ..... ..... ..... @vvv
vpackev_d 0111 00010001 01111 ..... ..... ..... @vvv
vpackod_b 0111 00010001 10000 ..... ..... ..... @vvv
vpackod_h 0111 00010001 10001 ..... ..... ..... @vvv
vpackod_w 0111 00010001 10010 ..... ..... ..... @vvv
vpackod_d 0111 00010001 10011 ..... ..... ..... @vvv
vpickev_b 0111 00010001 11100 ..... ..... ..... @vvv
vpickev_h 0111 00010001 11101 ..... ..... ..... @vvv
vpickev_w 0111 00010001 11110 ..... ..... ..... @vvv
vpickev_d 0111 00010001 11111 ..... ..... ..... @vvv
vpickod_b 0111 00010010 00000 ..... ..... ..... @vvv
vpickod_h 0111 00010010 00001 ..... ..... ..... @vvv
vpickod_w 0111 00010010 00010 ..... ..... ..... @vvv
vpickod_d 0111 00010010 00011 ..... ..... ..... @vvv

View file

@ -2766,3 +2766,91 @@ SETALLNEZ(vsetallnez_b, MO_8)
SETALLNEZ(vsetallnez_h, MO_16)
SETALLNEZ(vsetallnez_w, MO_32)
SETALLNEZ(vsetallnez_d, MO_64)
#define VPACKEV(NAME, BIT, E) \
void HELPER(NAME)(CPULoongArchState *env, \
uint32_t vd, uint32_t vj, uint32_t vk) \
{ \
int i; \
VReg temp; \
VReg *Vd = &(env->fpr[vd].vreg); \
VReg *Vj = &(env->fpr[vj].vreg); \
VReg *Vk = &(env->fpr[vk].vreg); \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
temp.E(2 * i + 1) = Vj->E(2 * i); \
temp.E(2 *i) = Vk->E(2 * i); \
} \
*Vd = temp; \
}
VPACKEV(vpackev_b, 16, B)
VPACKEV(vpackev_h, 32, H)
VPACKEV(vpackev_w, 64, W)
VPACKEV(vpackev_d, 128, D)
#define VPACKOD(NAME, BIT, E) \
void HELPER(NAME)(CPULoongArchState *env, \
uint32_t vd, uint32_t vj, uint32_t vk) \
{ \
int i; \
VReg temp; \
VReg *Vd = &(env->fpr[vd].vreg); \
VReg *Vj = &(env->fpr[vj].vreg); \
VReg *Vk = &(env->fpr[vk].vreg); \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
temp.E(2 * i + 1) = Vj->E(2 * i + 1); \
temp.E(2 * i) = Vk->E(2 * i + 1); \
} \
*Vd = temp; \
}
VPACKOD(vpackod_b, 16, B)
VPACKOD(vpackod_h, 32, H)
VPACKOD(vpackod_w, 64, W)
VPACKOD(vpackod_d, 128, D)
#define VPICKEV(NAME, BIT, E) \
void HELPER(NAME)(CPULoongArchState *env, \
uint32_t vd, uint32_t vj, uint32_t vk) \
{ \
int i; \
VReg temp; \
VReg *Vd = &(env->fpr[vd].vreg); \
VReg *Vj = &(env->fpr[vj].vreg); \
VReg *Vk = &(env->fpr[vk].vreg); \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i); \
temp.E(i) = Vk->E(2 * i); \
} \
*Vd = temp; \
}
VPICKEV(vpickev_b, 16, B)
VPICKEV(vpickev_h, 32, H)
VPICKEV(vpickev_w, 64, W)
VPICKEV(vpickev_d, 128, D)
#define VPICKOD(NAME, BIT, E) \
void HELPER(NAME)(CPULoongArchState *env, \
uint32_t vd, uint32_t vj, uint32_t vk) \
{ \
int i; \
VReg temp; \
VReg *Vd = &(env->fpr[vd].vreg); \
VReg *Vj = &(env->fpr[vj].vreg); \
VReg *Vk = &(env->fpr[vk].vreg); \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i + 1); \
temp.E(i) = Vk->E(2 * i + 1); \
} \
*Vd = temp; \
}
VPICKOD(vpickod_b, 16, B)
VPICKOD(vpickod_h, 32, H)
VPICKOD(vpickod_w, 64, W)
VPICKOD(vpickod_d, 128, D)