diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index e8b3a12d32..0d846c32c2 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -1165,6 +1165,8 @@ static const X86OpEntry opcodes_0F[256] = { [0xa4] = X86_OP_ENTRY4(SHLD, E,v, 2op,v, G,v), [0xa5] = X86_OP_ENTRY3(SHLD, E,v, 2op,v, G,v), + [0xb0] = X86_OP_ENTRY2(CMPXCHG,E,b, G,b, lock), + [0xb1] = X86_OP_ENTRY2(CMPXCHG,E,v, G,v, lock), [0xb2] = X86_OP_ENTRY3(LSS, G,v, EM,p, None, None), [0xb4] = X86_OP_ENTRY3(LFS, G,v, EM,p, None, None), [0xb5] = X86_OP_ENTRY3(LGS, G,v, EM,p, None, None), @@ -2597,7 +2599,6 @@ static void disas_insn(DisasContext *s, CPUState *cpu) case 0x1a ... 0x1b: /* MPX */ case 0xa3: /* bt */ case 0xab: /* bts */ - case 0xb0 ... 0xb1: /* cmpxchg */ case 0xb3: /* btr */ case 0xba ... 0xbb: /* grp8, btc */ case 0xc7: /* grp9 */ diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 3b92d04c0f..11faa70b5e 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -1586,6 +1586,57 @@ static void gen_CMPS(DisasContext *s, X86DecodedInsn *decode) } } +static void gen_CMPXCHG(DisasContext *s, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[2].ot; + TCGv cmpv = tcg_temp_new(); + TCGv oldv = tcg_temp_new(); + TCGv newv = tcg_temp_new(); + TCGv dest; + + tcg_gen_ext_tl(cmpv, cpu_regs[R_EAX], ot); + tcg_gen_ext_tl(newv, s->T1, ot); + if (s->prefix & PREFIX_LOCK) { + tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv, + s->mem_index, ot | MO_LE); + } else { + tcg_gen_ext_tl(oldv, s->T0, ot); + if (decode->op[0].has_ea) { + /* + * Perform an unconditional store cycle like physical cpu; + * must be before changing accumulator to ensure + * idempotency if the store faults and the instruction + * is restarted + */ + tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv); + gen_op_st_v(s, ot, newv, s->A0); + } else { + /* + * Unlike the memory case, where "the destination operand receives + * a write cycle without regard to the result of the comparison", + * rm must not be touched altogether if the write fails, including + * not zero-extending it on 64-bit processors. So, precompute + * the result of a successful writeback and perform the movcond + * directly on cpu_regs. In case rm is part of RAX, note that this + * movcond and the one below are mutually exclusive is executed. + */ + dest = gen_op_deposit_reg_v(s, ot, decode->op[0].n, newv, newv); + tcg_gen_movcond_tl(TCG_COND_EQ, dest, oldv, cmpv, newv, dest); + } + decode->op[0].unit = X86_OP_SKIP; + } + + /* Write RAX only if the cmpxchg fails. */ + dest = gen_op_deposit_reg_v(s, ot, R_EAX, s->T0, oldv); + tcg_gen_movcond_tl(TCG_COND_NE, dest, oldv, cmpv, s->T0, dest); + + tcg_gen_mov_tl(s->cc_srcT, cmpv); + tcg_gen_sub_tl(cmpv, cmpv, oldv); + decode->cc_dst = cmpv; + decode->cc_src = oldv; + decode->cc_op = CC_OP_SUBB + ot; +} + static void gen_CPUID(DisasContext *s, X86DecodedInsn *decode) { gen_update_cc_op(s); diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 5d9312bb48..ad1819815a 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -434,13 +434,6 @@ static inline MemOp mo_stacksize(DisasContext *s) return CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16; } -/* Select size 8 if lsb of B is clear, else OT. Used for decoding - byte vs word opcodes. */ -static inline MemOp mo_b_d(int b, MemOp ot) -{ - return b & 1 ? ot : MO_8; -} - /* Compute the result of writing t0 to the OT-sized register REG. * * If DEST is NULL, store the result into the register and return the @@ -715,11 +708,6 @@ static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign) return dst; } -static void gen_extu(MemOp ot, TCGv reg) -{ - gen_ext_tl(reg, reg, ot, false); -} - static void gen_exts(MemOp ot, TCGv reg) { gen_ext_tl(reg, reg, ot, true); @@ -3003,73 +2991,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) /* now check op code */ switch (b) { - /**************************/ - /* arith & logic */ - case 0x1b0: - case 0x1b1: /* cmpxchg Ev, Gv */ - { - TCGv oldv, newv, cmpv, dest; - - ot = mo_b_d(b, dflag); - modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | REX_R(s); - mod = (modrm >> 6) & 3; - oldv = tcg_temp_new(); - newv = tcg_temp_new(); - cmpv = tcg_temp_new(); - gen_op_mov_v_reg(s, ot, newv, reg); - tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]); - gen_extu(ot, cmpv); - if (s->prefix & PREFIX_LOCK) { - if (mod == 3) { - goto illegal_op; - } - gen_lea_modrm(env, s, modrm); - tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv, - s->mem_index, ot | MO_LE); - } else { - if (mod == 3) { - rm = (modrm & 7) | REX_B(s); - gen_op_mov_v_reg(s, ot, oldv, rm); - gen_extu(ot, oldv); - - /* - * Unlike the memory case, where "the destination operand receives - * a write cycle without regard to the result of the comparison", - * rm must not be touched altogether if the write fails, including - * not zero-extending it on 64-bit processors. So, precompute - * the result of a successful writeback and perform the movcond - * directly on cpu_regs. Also need to write accumulator first, in - * case rm is part of RAX too. - */ - dest = gen_op_deposit_reg_v(s, ot, rm, newv, newv); - tcg_gen_movcond_tl(TCG_COND_EQ, dest, oldv, cmpv, newv, dest); - } else { - gen_lea_modrm(env, s, modrm); - gen_op_ld_v(s, ot, oldv, s->A0); - - /* - * Perform an unconditional store cycle like physical cpu; - * must be before changing accumulator to ensure - * idempotency if the store faults and the instruction - * is restarted - */ - tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv); - gen_op_st_v(s, ot, newv, s->A0); - } - } - /* - * Write EAX only if the cmpxchg fails; reuse newv as the destination, - * since it's dead here. - */ - dest = gen_op_deposit_reg_v(s, ot, R_EAX, newv, oldv); - tcg_gen_movcond_tl(TCG_COND_EQ, dest, oldv, cmpv, dest, newv); - tcg_gen_mov_tl(cpu_cc_src, oldv); - tcg_gen_mov_tl(s->cc_srcT, cmpv); - tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv); - set_cc_op(s, CC_OP_SUBB + ot); - } - break; case 0x1c7: /* cmpxchg8b */ modrm = x86_ldub_code(env, s); mod = (modrm >> 6) & 3;