mirror of
https://gitlab.com/qemu-project/qemu
synced 2024-11-05 20:35:44 +00:00
target/arm: Expand vector registers for SVE
Change vfp.regs as a uint64_t to vfp.zregs as an ARMVectorReg. The previous patches have made the change in representation relatively painless. Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Message-id: 20180123035349.24538-2-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
parent
4cbca7d9b4
commit
c39c2b9043
4 changed files with 81 additions and 28 deletions
|
@ -153,6 +153,42 @@ typedef struct {
|
|||
uint32_t base_mask;
|
||||
} TCR;
|
||||
|
||||
/* Define a maximum sized vector register.
|
||||
* For 32-bit, this is a 128-bit NEON/AdvSIMD register.
|
||||
* For 64-bit, this is a 2048-bit SVE register.
|
||||
*
|
||||
* Note that the mapping between S, D, and Q views of the register bank
|
||||
* differs between AArch64 and AArch32.
|
||||
* In AArch32:
|
||||
* Qn = regs[n].d[1]:regs[n].d[0]
|
||||
* Dn = regs[n / 2].d[n & 1]
|
||||
* Sn = regs[n / 4].d[n % 4 / 2],
|
||||
* bits 31..0 for even n, and bits 63..32 for odd n
|
||||
* (and regs[16] to regs[31] are inaccessible)
|
||||
* In AArch64:
|
||||
* Zn = regs[n].d[*]
|
||||
* Qn = regs[n].d[1]:regs[n].d[0]
|
||||
* Dn = regs[n].d[0]
|
||||
* Sn = regs[n].d[0] bits 31..0
|
||||
*
|
||||
* This corresponds to the architecturally defined mapping between
|
||||
* the two execution states, and means we do not need to explicitly
|
||||
* map these registers when changing states.
|
||||
*
|
||||
* Align the data for use with TCG host vector operations.
|
||||
*/
|
||||
|
||||
#ifdef TARGET_AARCH64
|
||||
# define ARM_MAX_VQ 16
|
||||
#else
|
||||
# define ARM_MAX_VQ 1
|
||||
#endif
|
||||
|
||||
typedef struct ARMVectorReg {
|
||||
uint64_t d[2 * ARM_MAX_VQ] QEMU_ALIGNED(16);
|
||||
} ARMVectorReg;
|
||||
|
||||
|
||||
typedef struct CPUARMState {
|
||||
/* Regs for current mode. */
|
||||
uint32_t regs[16];
|
||||
|
@ -477,22 +513,7 @@ typedef struct CPUARMState {
|
|||
|
||||
/* VFP coprocessor state. */
|
||||
struct {
|
||||
/* VFP/Neon register state. Note that the mapping between S, D and Q
|
||||
* views of the register bank differs between AArch64 and AArch32:
|
||||
* In AArch32:
|
||||
* Qn = regs[2n+1]:regs[2n]
|
||||
* Dn = regs[n]
|
||||
* Sn = regs[n/2] bits 31..0 for even n, and bits 63..32 for odd n
|
||||
* (and regs[32] to regs[63] are inaccessible)
|
||||
* In AArch64:
|
||||
* Qn = regs[2n+1]:regs[2n]
|
||||
* Dn = regs[2n]
|
||||
* Sn = regs[2n] bits 31..0
|
||||
* This corresponds to the architecturally defined mapping between
|
||||
* the two execution states, and means we do not need to explicitly
|
||||
* map these registers when changing states.
|
||||
*/
|
||||
uint64_t regs[64] QEMU_ALIGNED(16);
|
||||
ARMVectorReg zregs[32];
|
||||
|
||||
uint32_t xregs[16];
|
||||
/* We store these fpcsr fields separately for convenience. */
|
||||
|
@ -2799,7 +2820,7 @@ static inline void *arm_get_el_change_hook_opaque(ARMCPU *cpu)
|
|||
*/
|
||||
static inline uint64_t *aa32_vfp_dreg(CPUARMState *env, unsigned regno)
|
||||
{
|
||||
return &env->vfp.regs[regno];
|
||||
return &env->vfp.zregs[regno >> 1].d[regno & 1];
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -2808,7 +2829,7 @@ static inline uint64_t *aa32_vfp_dreg(CPUARMState *env, unsigned regno)
|
|||
*/
|
||||
static inline uint64_t *aa32_vfp_qreg(CPUARMState *env, unsigned regno)
|
||||
{
|
||||
return &env->vfp.regs[2 * regno];
|
||||
return &env->vfp.zregs[regno].d[0];
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -2817,7 +2838,7 @@ static inline uint64_t *aa32_vfp_qreg(CPUARMState *env, unsigned regno)
|
|||
*/
|
||||
static inline uint64_t *aa64_vfp_qreg(CPUARMState *env, unsigned regno)
|
||||
{
|
||||
return &env->vfp.regs[2 * regno];
|
||||
return &env->vfp.zregs[regno].d[0];
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -50,7 +50,40 @@ static const VMStateDescription vmstate_vfp = {
|
|||
.minimum_version_id = 3,
|
||||
.needed = vfp_needed,
|
||||
.fields = (VMStateField[]) {
|
||||
VMSTATE_UINT64_ARRAY(env.vfp.regs, ARMCPU, 64),
|
||||
/* For compatibility, store Qn out of Zn here. */
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[0].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[1].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[2].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[3].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[4].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[5].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[6].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[7].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[8].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[9].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[10].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[11].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[12].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[13].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[14].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[15].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[16].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[17].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[18].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[19].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[20].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[21].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[22].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[23].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[24].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[25].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[26].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[27].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[28].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[29].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[30].d, ARMCPU, 0, 2),
|
||||
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[31].d, ARMCPU, 0, 2),
|
||||
|
||||
/* The xregs array is a little awkward because element 1 (FPSCR)
|
||||
* requires a specific accessor, so we have to split it up in
|
||||
* the vmstate:
|
||||
|
|
|
@ -525,8 +525,8 @@ static inline int vec_reg_offset(DisasContext *s, int regno,
|
|||
{
|
||||
int offs = 0;
|
||||
#ifdef HOST_WORDS_BIGENDIAN
|
||||
/* This is complicated slightly because vfp.regs[2n] is
|
||||
* still the low half and vfp.regs[2n+1] the high half
|
||||
/* This is complicated slightly because vfp.zregs[n].d[0] is
|
||||
* still the low half and vfp.zregs[n].d[1] the high half
|
||||
* of the 128 bit vector, even on big endian systems.
|
||||
* Calculate the offset assuming a fully bigendian 128 bits,
|
||||
* then XOR to account for the order of the two 64 bit halves.
|
||||
|
@ -536,7 +536,7 @@ static inline int vec_reg_offset(DisasContext *s, int regno,
|
|||
#else
|
||||
offs += element * (1 << size);
|
||||
#endif
|
||||
offs += offsetof(CPUARMState, vfp.regs[regno * 2]);
|
||||
offs += offsetof(CPUARMState, vfp.zregs[regno]);
|
||||
assert_fp_access_checked(s);
|
||||
return offs;
|
||||
}
|
||||
|
@ -545,7 +545,7 @@ static inline int vec_reg_offset(DisasContext *s, int regno,
|
|||
static inline int vec_full_reg_offset(DisasContext *s, int regno)
|
||||
{
|
||||
assert_fp_access_checked(s);
|
||||
return offsetof(CPUARMState, vfp.regs[regno * 2]);
|
||||
return offsetof(CPUARMState, vfp.zregs[regno]);
|
||||
}
|
||||
|
||||
/* Return a newly allocated pointer to the vector register. */
|
||||
|
|
|
@ -1512,13 +1512,12 @@ static inline void gen_vfp_st(DisasContext *s, int dp, TCGv_i32 addr)
|
|||
}
|
||||
}
|
||||
|
||||
static inline long
|
||||
vfp_reg_offset (int dp, int reg)
|
||||
static inline long vfp_reg_offset(bool dp, unsigned reg)
|
||||
{
|
||||
if (dp) {
|
||||
return offsetof(CPUARMState, vfp.regs[reg]);
|
||||
return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
|
||||
} else {
|
||||
long ofs = offsetof(CPUARMState, vfp.regs[reg >> 1]);
|
||||
long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
|
||||
if (reg & 1) {
|
||||
ofs += offsetof(CPU_DoubleU, l.upper);
|
||||
} else {
|
||||
|
|
Loading…
Reference in a new issue