target/i386: convert LZCNT/TZCNT/BSF/BSR/POPCNT to new decoder

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Paolo Bonzini 2024-05-09 15:11:41 +02:00
parent 6476902740
commit 11ffaf8c73
4 changed files with 133 additions and 76 deletions

View file

@ -450,6 +450,50 @@ static void decode_0F7F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, ui
*entry = *decode_by_prefix(s, opcodes_0F7F);
}
static void decode_0FB8(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
{
static const X86OpEntry popcnt =
X86_OP_ENTRYwr(POPCNT, G,v, E,v, cpuid(POPCNT) zextT0);
if (s->prefix & PREFIX_REPZ) {
*entry = popcnt;
} else {
memset(entry, 0, sizeof(*entry));
}
}
static void decode_0FBC(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
{
/* For BSF, pass 2op as the third operand so that we can use zextT0 */
static const X86OpEntry opcodes_0FBC[4] = {
X86_OP_ENTRY3(BSF, G,v, E,v, 2op,v, zextT0),
X86_OP_ENTRY3(BSF, G,v, E,v, 2op,v, zextT0), /* 0x66 */
X86_OP_ENTRYwr(TZCNT, G,v, E,v, zextT0), /* 0xf3 */
X86_OP_ENTRY3(BSF, G,v, E,v, 2op,v, zextT0), /* 0xf2 */
};
if (!(s->cpuid_ext3_features & CPUID_EXT3_ABM)) {
*entry = opcodes_0FBC[0];
} else {
*entry = *decode_by_prefix(s, opcodes_0FBC);
}
}
static void decode_0FBD(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
{
/* For BSR, pass 2op as the third operand so that we can use zextT0 */
static const X86OpEntry opcodes_0FBD[4] = {
X86_OP_ENTRY3(BSR, G,v, E,v, 2op,v, zextT0),
X86_OP_ENTRY3(BSR, G,v, E,v, 2op,v, zextT0), /* 0x66 */
X86_OP_ENTRYwr(LZCNT, G,v, E,v, zextT0), /* 0xf3 */
X86_OP_ENTRY3(BSR, G,v, E,v, 2op,v, zextT0), /* 0xf2 */
};
if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
*entry = opcodes_0FBD[0];
} else {
*entry = *decode_by_prefix(s, opcodes_0FBD);
}
}
static void decode_0FD6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
{
static const X86OpEntry movq[4] = {
@ -1255,8 +1299,11 @@ static const X86OpEntry opcodes_0F[256] = {
*/
[0xaf] = X86_OP_ENTRY3(IMUL3, G,v, E,v, 2op,v, sextT0),
[0xb8] = X86_OP_GROUP0(0FB8),
/* decoded as modrm, which is visible as a difference between page fault and #UD */
[0xb9] = X86_OP_ENTRYr(UD, nop,v), /* UD1 */
[0xbc] = X86_OP_GROUP0(0FBC),
[0xbd] = X86_OP_GROUP0(0FBD),
[0xbe] = X86_OP_ENTRY3(MOV, G,v, E,b, None, None, sextT0), /* MOVSX */
[0xbf] = X86_OP_ENTRY3(MOV, G,v, E,w, None, None, sextT0), /* MOVSX */
@ -2158,6 +2205,8 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid)
return (s->cpuid_ext_features & CPUID_EXT_MOVBE);
case X86_FEAT_PCLMULQDQ:
return (s->cpuid_ext_features & CPUID_EXT_PCLMULQDQ);
case X86_FEAT_POPCNT:
return (s->cpuid_ext_features & CPUID_EXT_POPCNT);
case X86_FEAT_SSE:
return (s->cpuid_features & CPUID_SSE);
case X86_FEAT_SSE2:
@ -2548,8 +2597,7 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
case 0xab: /* bts */
case 0xb0 ... 0xb1: /* cmpxchg */
case 0xb3: /* btr */
case 0xb8: /* integer ops */
case 0xba ... 0xbd: /* integer ops */
case 0xba ... 0xbb: /* grp8, btc */
case 0xc0 ... 0xc1: /* xadd */
case 0xc7: /* grp9 */
disas_insn_old(s, cpu, b + 0x100);

View file

@ -120,6 +120,7 @@ typedef enum X86CPUIDFeature {
X86_FEAT_FXSR,
X86_FEAT_MOVBE,
X86_FEAT_PCLMULQDQ,
X86_FEAT_POPCNT,
X86_FEAT_SHA_NI,
X86_FEAT_SSE,
X86_FEAT_SSE2,

View file

@ -1333,6 +1333,47 @@ static void gen_BOUND(DisasContext *s, X86DecodedInsn *decode)
}
}
/* Non-standard convention - on entry T0 is zero-extended input, T1 is the output. */
static void gen_BSF(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
/* Only the Z bit is defined and it is related to the input. */
decode->cc_dst = tcg_temp_new();
decode->cc_op = CC_OP_LOGICB + ot;
tcg_gen_mov_tl(decode->cc_dst, s->T0);
/*
* The manual says that the output is undefined when the
* input is zero, but real hardware leaves it unchanged, and
* real programs appear to depend on that. Accomplish this
* by passing the output as the value to return upon zero.
*/
tcg_gen_ctz_tl(s->T0, s->T0, s->T1);
}
/* Non-standard convention - on entry T0 is zero-extended input, T1 is the output. */
static void gen_BSR(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
/* Only the Z bit is defined and it is related to the input. */
decode->cc_dst = tcg_temp_new();
decode->cc_op = CC_OP_LOGICB + ot;
tcg_gen_mov_tl(decode->cc_dst, s->T0);
/*
* The manual says that the output is undefined when the
* input is zero, but real hardware leaves it unchanged, and
* real programs appear to depend on that. Accomplish this
* by passing the output as the value to return upon zero.
* Plus, return the bit index of the first 1 bit.
*/
tcg_gen_xori_tl(s->T1, s->T1, TARGET_LONG_BITS - 1);
tcg_gen_clz_tl(s->T0, s->T0, s->T1);
tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
}
static void gen_BSWAP(DisasContext *s, X86DecodedInsn *decode)
{
#ifdef TARGET_X86_64
@ -2134,6 +2175,24 @@ static void gen_LSS(DisasContext *s, X86DecodedInsn *decode)
gen_lxx_seg(s, decode, R_SS);
}
static void gen_LZCNT(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
/* C bit (cc_src) is defined related to the input. */
decode->cc_src = tcg_temp_new();
decode->cc_dst = s->T0;
decode->cc_op = CC_OP_BMILGB + ot;
tcg_gen_mov_tl(decode->cc_src, s->T0);
/*
* Reduce the target_ulong result by the number of zeros that
* we expect to find at the top.
*/
tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - (8 << ot));
}
static void gen_MFENCE(DisasContext *s, X86DecodedInsn *decode)
{
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
@ -2692,6 +2751,15 @@ static void gen_POPA(DisasContext *s, X86DecodedInsn *decode)
gen_popa(s);
}
static void gen_POPCNT(DisasContext *s, X86DecodedInsn *decode)
{
decode->cc_src = tcg_temp_new();
decode->cc_op = CC_OP_POPCNT;
tcg_gen_mov_tl(decode->cc_src, s->T0);
tcg_gen_ctpop_tl(s->T0, s->T0);
}
static void gen_POPF(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot;
@ -3773,6 +3841,20 @@ static void gen_SYSRET(DisasContext *s, X86DecodedInsn *decode)
s->base.is_jmp = DISAS_EOB_RECHECK_TF;
}
static void gen_TZCNT(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
/* C bit (cc_src) is defined related to the input. */
decode->cc_src = tcg_temp_new();
decode->cc_dst = s->T0;
decode->cc_op = CC_OP_BMILGB + ot;
tcg_gen_mov_tl(decode->cc_src, s->T0);
/* A zero input returns the operand size. */
tcg_gen_ctzi_tl(s->T0, s->T0, 8 << ot);
}
static void gen_UD(DisasContext *s, X86DecodedInsn *decode)
{
gen_illegal_opcode(s);

View file

@ -823,11 +823,6 @@ static void gen_movs(DisasContext *s, MemOp ot)
gen_op_add_reg(s, s->aflag, R_EDI, dshift);
}
static void gen_op_update1_cc(DisasContext *s)
{
tcg_gen_mov_tl(cpu_cc_dst, s->T0);
}
static void gen_op_update2_cc(DisasContext *s)
{
tcg_gen_mov_tl(cpu_cc_src, s->T1);
@ -3311,56 +3306,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
break;
}
break;
case 0x1bc: /* bsf / tzcnt */
case 0x1bd: /* bsr / lzcnt */
ot = dflag;
modrm = x86_ldub_code(env, s);
reg = ((modrm >> 3) & 7) | REX_R(s);
gen_ld_modrm(env, s, modrm, ot);
gen_extu(ot, s->T0);
/* Note that lzcnt and tzcnt are in different extensions. */
if ((prefixes & PREFIX_REPZ)
&& (b & 1
? s->cpuid_ext3_features & CPUID_EXT3_ABM
: s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
int size = 8 << ot;
/* For lzcnt/tzcnt, C bit is defined related to the input. */
tcg_gen_mov_tl(cpu_cc_src, s->T0);
if (b & 1) {
/* For lzcnt, reduce the target_ulong result by the
number of zeros that we expect to find at the top. */
tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
} else {
/* For tzcnt, a zero input must return the operand size. */
tcg_gen_ctzi_tl(s->T0, s->T0, size);
}
/* For lzcnt/tzcnt, Z bit is defined related to the result. */
gen_op_update1_cc(s);
set_cc_op(s, CC_OP_BMILGB + ot);
} else {
/* For bsr/bsf, only the Z bit is defined and it is related
to the input and not the result. */
tcg_gen_mov_tl(cpu_cc_dst, s->T0);
set_cc_op(s, CC_OP_LOGICB + ot);
/* ??? The manual says that the output is undefined when the
input is zero, but real hardware leaves it unchanged, and
real programs appear to depend on that. Accomplish this
by passing the output as the value to return upon zero. */
if (b & 1) {
/* For bsr, return the bit index of the first 1 bit,
not the count of leading zeros. */
tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
tcg_gen_clz_tl(s->T0, s->T0, s->T1);
tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
} else {
tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
}
}
gen_op_mov_reg_v(s, ot, reg, s->T0);
break;
case 0x100:
modrm = x86_ldub_code(env, s);
mod = (modrm >> 6) & 3;
@ -3955,25 +3900,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
}
gen_nop_modrm(env, s, modrm);
break;
case 0x1b8: /* SSE4.2 popcnt */
if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
PREFIX_REPZ)
goto illegal_op;
if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
goto illegal_op;
modrm = x86_ldub_code(env, s);
reg = ((modrm >> 3) & 7) | REX_R(s);
ot = dflag;
gen_ld_modrm(env, s, modrm, ot);
gen_extu(ot, s->T0);
tcg_gen_mov_tl(cpu_cc_src, s->T0);
tcg_gen_ctpop_tl(s->T0, s->T0);
gen_op_mov_reg_v(s, ot, reg, s->T0);
set_cc_op(s, CC_OP_POPCNT);
break;
default:
g_assert_not_reached();
}