target-arm queue:

* more A64 Neon instructions
  * fixes to reset CBAR values for A9 and A15 boards
  * fix accesses to PMCR register in -icount mode
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.11 (GNU/Linux)
 
 iQIcBAABCAAGBQJTJ3GEAAoJEDwlJe0UNgzevGcP/2ftt3PRACZ9BqUh6s1xBW0/
 /dqcEIildxZYxmTHDH+g5t2ueho+o+qhpmXf+lHO0C+nl86SRm/DVJj+tmuUoWdf
 5BA1eOVjQnvrnmQx72/CS4NI4t0npoYf7Cserkpm9ZOdzweJy68YHZZRVpHLfldS
 Ba7W749EsGPnd5ZEhnplwGSIjM3ZUfixm3yJSsGnHAf6KEskkVKjUUI2lZWecT81
 5f14qN6F7qk7XvH9HGOWZktiKGfaSLVXzZGsmdq6oDVTr+2ZMkoFxn7jMFm4EHtW
 cTDVcwN9Y6tFM2Pm7PIxzXmP9lTc5L+ghVXn9XhuY9OS7ZFD46r/sh3Lkhypq+WP
 SfJaPOG5zZuKkmj+hyO+08hjLxR+TJDIKr26tY62yGrteWN+SkzoJuO6Gn17uuC8
 UhAqjbLuunhSlJA7oy42i7YcR84LXemMCplbqBY/v7W54ZWrxV+QgNKiLtbsIpWF
 tGg8R85jkjE7lV7dfaeK7N+vQjGIMwzT+g9sYyS3zsY0ubFnkIMa04Zn4gMsCheU
 azmyCfQOCmdN71CEEN6rbTWL3AtWw2Oss1RxK1iQu5J8+YgC2TvNsb4hE4K5KctX
 utvoPoVScBWZvvX2zvMv43+qz74arSTOxuBCMW9Gf0pEQA1cT0GdYzRrb3g+8CCp
 n3GuAoTMj2d72c2WO36I
 =YFAg
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20140317' into staging

target-arm queue:
 * more A64 Neon instructions
 * fixes to reset CBAR values for A9 and A15 boards
 * fix accesses to PMCR register in -icount mode

# gpg: Signature made Mon 17 Mar 2014 22:04:52 GMT using RSA key ID 14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"

* remotes/pmaydell/tags/pull-target-arm-20140317: (30 commits)
  scripts/qemu-binfmt-conf.sh: Add AArch64 registration
  target-arm: A64: Add [UF]RSQRTE (reciprocal root estimate)
  target-arm: A64: Implement FCVTXN
  target-arm: A64: Implement scalar saturating narrow ops
  target-arm: A64: Move handle_2misc_narrow function
  target-arm: A64: Implement AdvSIMD reciprocal estimate insns URECPE, FRECPE
  softfloat: export squash_input_denormal functions
  target-arm: A64: Implement FCVTZS, FCVTZU in the shift-imm categories
  target-arm: A64: Handle saturating left shifts SQSHL, SQSHLU, UQSHL
  exec-all.h: Increase MAX_OP_PER_INSTR for ARM A64 decoder
  target-arm: A64: Implement FRINT*
  target-arm: A64: Implement SRI
  target-arm: A64: Add FRECPX (reciprocal exponent)
  target-arm: A64: List unsupported shift-imm opcodes
  target-arm: A64: Implement FCVTL
  target-arm: A64: Implement FCVTN
  target-arm: A64: Implement FCVT[NMAPZ][SU] SIMD instructions
  target-arm: A64: Implement SHLL, SHLL2
  target-arm: A64: Implement SADDLP, UADDLP, SADALP, UADALP
  target-arm: A64: Saturating and narrowing shift ops
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2014-03-18 14:31:42 +00:00
commit 2dda43bacc
15 changed files with 1861 additions and 269 deletions

View file

@ -288,7 +288,7 @@ INLINE flag extractFloat32Sign( float32 a )
| If `a' is denormal and we are in flush-to-zero mode then set the
| input-denormal exception and return zero. Otherwise just return the value.
*----------------------------------------------------------------------------*/
static float32 float32_squash_input_denormal(float32 a STATUS_PARAM)
float32 float32_squash_input_denormal(float32 a STATUS_PARAM)
{
if (STATUS(flush_inputs_to_zero)) {
if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
@ -473,7 +473,7 @@ INLINE flag extractFloat64Sign( float64 a )
| If `a' is denormal and we are in flush-to-zero mode then set the
| input-denormal exception and return zero. Otherwise just return the value.
*----------------------------------------------------------------------------*/
static float64 float64_squash_input_denormal(float64 a STATUS_PARAM)
float64 float64_squash_input_denormal(float64 a STATUS_PARAM)
{
if (STATUS(flush_inputs_to_zero)) {
if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {

View file

@ -143,11 +143,21 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem,
unsigned long mem_size;
DeviceState *dev;
SysBusDevice *busdev;
ObjectClass *cpu_oc;
cpu_oc = cpu_class_by_name(TYPE_ARM_CPU, "cortex-a9");
assert(cpu_oc);
for (n = 0; n < EXYNOS4210_NCPUS; n++) {
s->cpu[n] = cpu_arm_init("cortex-a9");
if (!s->cpu[n]) {
fprintf(stderr, "Unable to find CPU %d definition\n", n);
Object *cpuobj = object_new(object_class_get_name(cpu_oc));
Error *err = NULL;
s->cpu[n] = ARM_CPU(cpuobj);
object_property_set_int(cpuobj, EXYNOS4210_SMP_PRIVATE_BASE_ADDR,
"reset-cbar", &error_abort);
object_property_set_bool(cpuobj, true, "realized", &err);
if (err) {
error_report("%s", error_get_pretty(err));
exit(1);
}
}

View file

@ -18,6 +18,7 @@
#include "hw/i2c/i2c.h"
#include "sysemu/blockdev.h"
#include "exec/address-spaces.h"
#include "qemu/error-report.h"
#define SMP_BOOT_ADDR 0xe0000000
#define SMP_BOOTREG_ADDR 0x10000030
@ -49,6 +50,7 @@ static void realview_init(QEMUMachineInitArgs *args,
{
ARMCPU *cpu = NULL;
CPUARMState *env;
ObjectClass *cpu_oc;
MemoryRegion *sysmem = get_system_memory();
MemoryRegion *ram_lo = g_new(MemoryRegion, 1);
MemoryRegion *ram_hi = g_new(MemoryRegion, 1);
@ -70,12 +72,14 @@ static void realview_init(QEMUMachineInitArgs *args,
uint32_t sys_id;
ram_addr_t low_ram_size;
ram_addr_t ram_size = args->ram_size;
hwaddr periphbase = 0;
switch (board_type) {
case BOARD_EB:
break;
case BOARD_EB_MPCORE:
is_mpcore = 1;
periphbase = 0x10100000;
break;
case BOARD_PB_A8:
is_pb = 1;
@ -83,16 +87,37 @@ static void realview_init(QEMUMachineInitArgs *args,
case BOARD_PBX_A9:
is_mpcore = 1;
is_pb = 1;
periphbase = 0x1f000000;
break;
}
cpu_oc = cpu_class_by_name(TYPE_ARM_CPU, args->cpu_model);
if (!cpu_oc) {
fprintf(stderr, "Unable to find CPU definition\n");
exit(1);
}
for (n = 0; n < smp_cpus; n++) {
cpu = cpu_arm_init(args->cpu_model);
if (!cpu) {
fprintf(stderr, "Unable to find CPU definition\n");
Object *cpuobj = object_new(object_class_get_name(cpu_oc));
Error *err = NULL;
if (is_pb && is_mpcore) {
object_property_set_int(cpuobj, periphbase, "reset-cbar", &err);
if (err) {
error_report("%s", error_get_pretty(err));
exit(1);
}
}
object_property_set_bool(cpuobj, true, "realized", &err);
if (err) {
error_report("%s", error_get_pretty(err));
exit(1);
}
cpu_irq[n] = qdev_get_gpio_in(DEVICE(cpu), ARM_CPU_IRQ);
cpu_irq[n] = qdev_get_gpio_in(DEVICE(cpuobj), ARM_CPU_IRQ);
}
cpu = ARM_CPU(first_cpu);
env = &cpu->env;
if (arm_feature(env, ARM_FEATURE_V7)) {
if (is_mpcore) {
@ -141,16 +166,10 @@ static void realview_init(QEMUMachineInitArgs *args,
sysbus_mmio_map(SYS_BUS_DEVICE(sysctl), 0, 0x10000000);
if (is_mpcore) {
hwaddr periphbase;
dev = qdev_create(NULL, is_pb ? "a9mpcore_priv": "realview_mpcore");
qdev_prop_set_uint32(dev, "num-cpu", smp_cpus);
qdev_init_nofail(dev);
busdev = SYS_BUS_DEVICE(dev);
if (is_pb) {
periphbase = 0x1f000000;
} else {
periphbase = 0x10100000;
}
sysbus_mmio_map(busdev, 0, periphbase);
for (n = 0; n < smp_cpus; n++) {
sysbus_connect_irq(busdev, n, cpu_irq[n]);

View file

@ -32,6 +32,7 @@
#include "sysemu/blockdev.h"
#include "hw/block/flash.h"
#include "sysemu/device_tree.h"
#include "qemu/error-report.h"
#include <libfdt.h>
#define VEXPRESS_BOARD_ID 0x8e0
@ -173,6 +174,64 @@ struct VEDBoardInfo {
DBoardInitFn *init;
};
static void init_cpus(const char *cpu_model, const char *privdev,
hwaddr periphbase, qemu_irq *pic)
{
ObjectClass *cpu_oc = cpu_class_by_name(TYPE_ARM_CPU, cpu_model);
DeviceState *dev;
SysBusDevice *busdev;
int n;
if (!cpu_oc) {
fprintf(stderr, "Unable to find CPU definition\n");
exit(1);
}
/* Create the actual CPUs */
for (n = 0; n < smp_cpus; n++) {
Object *cpuobj = object_new(object_class_get_name(cpu_oc));
Error *err = NULL;
object_property_set_int(cpuobj, periphbase, "reset-cbar", &err);
if (err) {
error_report("%s", error_get_pretty(err));
exit(1);
}
object_property_set_bool(cpuobj, true, "realized", &err);
if (err) {
error_report("%s", error_get_pretty(err));
exit(1);
}
}
/* Create the private peripheral devices (including the GIC);
* this must happen after the CPUs are created because a15mpcore_priv
* wires itself up to the CPU's generic_timer gpio out lines.
*/
dev = qdev_create(NULL, privdev);
qdev_prop_set_uint32(dev, "num-cpu", smp_cpus);
qdev_init_nofail(dev);
busdev = SYS_BUS_DEVICE(dev);
sysbus_mmio_map(busdev, 0, periphbase);
/* Interrupts [42:0] are from the motherboard;
* [47:43] are reserved; [63:48] are daughterboard
* peripherals. Note that some documentation numbers
* external interrupts starting from 32 (because there
* are internal interrupts 0..31).
*/
for (n = 0; n < 64; n++) {
pic[n] = qdev_get_gpio_in(dev, n);
}
/* Connect the CPUs to the GIC */
for (n = 0; n < smp_cpus; n++) {
DeviceState *cpudev = DEVICE(qemu_get_cpu(n));
sysbus_connect_irq(busdev, n, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ));
}
}
static void a9_daughterboard_init(const VEDBoardInfo *daughterboard,
ram_addr_t ram_size,
const char *cpu_model,
@ -181,25 +240,12 @@ static void a9_daughterboard_init(const VEDBoardInfo *daughterboard,
MemoryRegion *sysmem = get_system_memory();
MemoryRegion *ram = g_new(MemoryRegion, 1);
MemoryRegion *lowram = g_new(MemoryRegion, 1);
DeviceState *dev;
SysBusDevice *busdev;
int n;
qemu_irq cpu_irq[4];
ram_addr_t low_ram_size;
if (!cpu_model) {
cpu_model = "cortex-a9";
}
for (n = 0; n < smp_cpus; n++) {
ARMCPU *cpu = cpu_arm_init(cpu_model);
if (!cpu) {
fprintf(stderr, "Unable to find CPU definition\n");
exit(1);
}
cpu_irq[n] = qdev_get_gpio_in(DEVICE(cpu), ARM_CPU_IRQ);
}
if (ram_size > 0x40000000) {
/* 1GB is the maximum the address space permits */
fprintf(stderr, "vexpress-a9: cannot model more than 1GB RAM\n");
@ -221,23 +267,7 @@ static void a9_daughterboard_init(const VEDBoardInfo *daughterboard,
memory_region_add_subregion(sysmem, 0x60000000, ram);
/* 0x1e000000 A9MPCore (SCU) private memory region */
dev = qdev_create(NULL, "a9mpcore_priv");
qdev_prop_set_uint32(dev, "num-cpu", smp_cpus);
qdev_init_nofail(dev);
busdev = SYS_BUS_DEVICE(dev);
sysbus_mmio_map(busdev, 0, 0x1e000000);
for (n = 0; n < smp_cpus; n++) {
sysbus_connect_irq(busdev, n, cpu_irq[n]);
}
/* Interrupts [42:0] are from the motherboard;
* [47:43] are reserved; [63:48] are daughterboard
* peripherals. Note that some documentation numbers
* external interrupts starting from 32 (because the
* A9MP has internal interrupts 0..31).
*/
for (n = 0; n < 64; n++) {
pic[n] = qdev_get_gpio_in(dev, n);
}
init_cpus(cpu_model, "a9mpcore_priv", 0x1e000000, pic);
/* Daughterboard peripherals : 0x10020000 .. 0x20000000 */
@ -296,29 +326,14 @@ static void a15_daughterboard_init(const VEDBoardInfo *daughterboard,
const char *cpu_model,
qemu_irq *pic)
{
int n;
MemoryRegion *sysmem = get_system_memory();
MemoryRegion *ram = g_new(MemoryRegion, 1);
MemoryRegion *sram = g_new(MemoryRegion, 1);
qemu_irq cpu_irq[4];
DeviceState *dev;
SysBusDevice *busdev;
if (!cpu_model) {
cpu_model = "cortex-a15";
}
for (n = 0; n < smp_cpus; n++) {
ARMCPU *cpu;
cpu = cpu_arm_init(cpu_model);
if (!cpu) {
fprintf(stderr, "Unable to find CPU definition\n");
exit(1);
}
cpu_irq[n] = qdev_get_gpio_in(DEVICE(cpu), ARM_CPU_IRQ);
}
{
/* We have to use a separate 64 bit variable here to avoid the gcc
* "comparison is always false due to limited range of data type"
@ -337,23 +352,7 @@ static void a15_daughterboard_init(const VEDBoardInfo *daughterboard,
memory_region_add_subregion(sysmem, 0x80000000, ram);
/* 0x2c000000 A15MPCore private memory region (GIC) */
dev = qdev_create(NULL, "a15mpcore_priv");
qdev_prop_set_uint32(dev, "num-cpu", smp_cpus);
qdev_init_nofail(dev);
busdev = SYS_BUS_DEVICE(dev);
sysbus_mmio_map(busdev, 0, 0x2c000000);
for (n = 0; n < smp_cpus; n++) {
sysbus_connect_irq(busdev, n, cpu_irq[n]);
}
/* Interrupts [42:0] are from the motherboard;
* [47:43] are reserved; [63:48] are daughterboard
* peripherals. Note that some documentation numbers
* external interrupts starting from 32 (because there
* are internal interrupts 0..31).
*/
for (n = 0; n < 64; n++) {
pic[n] = qdev_get_gpio_in(dev, n);
}
init_cpus(cpu_model, "a15mpcore_priv", 0x2c000000, pic);
/* A15 daughterboard peripherals: */

View file

@ -390,6 +390,12 @@ static void machvirt_init(QEMUMachineInitArgs *args)
if (n > 0) {
object_property_set_bool(cpuobj, true, "start-powered-off", NULL);
}
if (object_property_find(cpuobj, "reset-cbar", NULL)) {
object_property_set_int(cpuobj, vbi->memmap[VIRT_CPUPERIPHS].base,
"reset-cbar", &error_abort);
}
object_property_set_bool(cpuobj, true, "realized", NULL);
}
fdt_add_cpu_nodes(vbi);

View file

@ -44,7 +44,7 @@ struct TranslationBlock;
typedef struct TranslationBlock TranslationBlock;
/* XXX: make safe guess about sizes */
#define MAX_OP_PER_INSTR 208
#define MAX_OP_PER_INSTR 266
#if HOST_LONG_BITS == 32
#define MAX_OPC_PARAM_PER_ARG 2

View file

@ -244,6 +244,13 @@ INLINE flag get_default_nan_mode(float_status *status)
*----------------------------------------------------------------------------*/
void float_raise( int8 flags STATUS_PARAM);
/*----------------------------------------------------------------------------
| If `a' is denormal and we are in flush-to-zero mode then set the
| input-denormal exception and return zero. Otherwise just return the value.
*----------------------------------------------------------------------------*/
float32 float32_squash_input_denormal(float32 a STATUS_PARAM);
float64 float64_squash_input_denormal(float64 a STATUS_PARAM);
/*----------------------------------------------------------------------------
| Options to indicate which negations to perform in float*_muladd()
| Using these differs from negating an input or output before calling

View file

@ -41,6 +41,9 @@ if [ $cpu != "arm" ] ; then
echo ':arm:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-arm:' > /proc/sys/fs/binfmt_misc/register
echo ':armeb:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-armeb:' > /proc/sys/fs/binfmt_misc/register
fi
if [ $cpu != "aarch64" ] ; then
echo ':aarch64:M::\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xb7\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-aarch64:' > /proc/sys/fs/binfmt_misc/register
fi
if [ $cpu != "sparc" ] ; then
echo ':sparc:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x02:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-sparc:' > /proc/sys/fs/binfmt_misc/register
fi

View file

@ -60,6 +60,11 @@ uint32_t HELPER(cls32)(uint32_t x)
return clrsb32(x);
}
uint32_t HELPER(clz32)(uint32_t x)
{
return clz32(x);
}
uint64_t HELPER(rbit64)(uint64_t x)
{
/* assign the correct byte position */
@ -180,6 +185,36 @@ uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices,
return result;
}
/* Helper function for 64 bit polynomial multiply case:
* perform PolynomialMult(op1, op2) and return either the top or
* bottom half of the 128 bit result.
*/
uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2)
{
int bitnum;
uint64_t res = 0;
for (bitnum = 0; bitnum < 64; bitnum++) {
if (op1 & (1ULL << bitnum)) {
res ^= op2 << bitnum;
}
}
return res;
}
uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2)
{
int bitnum;
uint64_t res = 0;
/* bit 0 of op1 can't influence the high 64 bits at all */
for (bitnum = 1; bitnum < 64; bitnum++) {
if (op1 & (1ULL << bitnum)) {
res ^= op2 >> (64 - bitnum);
}
}
return res;
}
/* 64bit/double versions of the neon float compare functions */
uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
{
@ -258,3 +293,146 @@ float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp)
}
return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
}
/* Pairwise long add: add pairs of adjacent elements into
* double-width elements in the result (eg _s8 is an 8x8->16 op)
*/
uint64_t HELPER(neon_addlp_s8)(uint64_t a)
{
uint64_t nsignmask = 0x0080008000800080ULL;
uint64_t wsignmask = 0x8000800080008000ULL;
uint64_t elementmask = 0x00ff00ff00ff00ffULL;
uint64_t tmp1, tmp2;
uint64_t res, signres;
/* Extract odd elements, sign extend each to a 16 bit field */
tmp1 = a & elementmask;
tmp1 ^= nsignmask;
tmp1 |= wsignmask;
tmp1 = (tmp1 - nsignmask) ^ wsignmask;
/* Ditto for the even elements */
tmp2 = (a >> 8) & elementmask;
tmp2 ^= nsignmask;
tmp2 |= wsignmask;
tmp2 = (tmp2 - nsignmask) ^ wsignmask;
/* calculate the result by summing bits 0..14, 16..22, etc,
* and then adjusting the sign bits 15, 23, etc manually.
* This ensures the addition can't overflow the 16 bit field.
*/
signres = (tmp1 ^ tmp2) & wsignmask;
res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
res ^= signres;
return res;
}
uint64_t HELPER(neon_addlp_u8)(uint64_t a)
{
uint64_t tmp;
tmp = a & 0x00ff00ff00ff00ffULL;
tmp += (a >> 8) & 0x00ff00ff00ff00ffULL;
return tmp;
}
uint64_t HELPER(neon_addlp_s16)(uint64_t a)
{
int32_t reslo, reshi;
reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16);
reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48);
return (uint32_t)reslo | (((uint64_t)reshi) << 32);
}
uint64_t HELPER(neon_addlp_u16)(uint64_t a)
{
uint64_t tmp;
tmp = a & 0x0000ffff0000ffffULL;
tmp += (a >> 16) & 0x0000ffff0000ffffULL;
return tmp;
}
/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
float32 HELPER(frecpx_f32)(float32 a, void *fpstp)
{
float_status *fpst = fpstp;
uint32_t val32, sbit;
int32_t exp;
if (float32_is_any_nan(a)) {
float32 nan = a;
if (float32_is_signaling_nan(a)) {
float_raise(float_flag_invalid, fpst);
nan = float32_maybe_silence_nan(a);
}
if (fpst->default_nan_mode) {
nan = float32_default_nan;
}
return nan;
}
val32 = float32_val(a);
sbit = 0x80000000ULL & val32;
exp = extract32(val32, 23, 8);
if (exp == 0) {
return make_float32(sbit | (0xfe << 23));
} else {
return make_float32(sbit | (~exp & 0xff) << 23);
}
}
float64 HELPER(frecpx_f64)(float64 a, void *fpstp)
{
float_status *fpst = fpstp;
uint64_t val64, sbit;
int64_t exp;
if (float64_is_any_nan(a)) {
float64 nan = a;
if (float64_is_signaling_nan(a)) {
float_raise(float_flag_invalid, fpst);
nan = float64_maybe_silence_nan(a);
}
if (fpst->default_nan_mode) {
nan = float64_default_nan;
}
return nan;
}
val64 = float64_val(a);
sbit = 0x8000000000000000ULL & val64;
exp = extract64(float64_val(a), 52, 11);
if (exp == 0) {
return make_float64(sbit | (0x7feULL << 52));
} else {
return make_float64(sbit | (~exp & 0x7ffULL) << 52);
}
}
float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env)
{
/* Von Neumann rounding is implemented by using round-to-zero
* and then setting the LSB of the result if Inexact was raised.
*/
float32 r;
float_status *fpst = &env->vfp.fp_status;
float_status tstat = *fpst;
int exflags;
set_float_rounding_mode(float_round_to_zero, &tstat);
set_float_exception_flags(0, &tstat);
r = float64_to_float32(a, &tstat);
r = float32_maybe_silence_nan(r);
exflags = get_float_exception_flags(&tstat);
if (exflags & float_flag_inexact) {
r = make_float32(float32_val(r) | 1);
}
exflags |= get_float_exception_flags(fpst);
set_float_exception_flags(exflags, fpst);
return r;
}

View file

@ -21,12 +21,15 @@ DEF_HELPER_FLAGS_2(sdiv64, TCG_CALL_NO_RWG_SE, s64, s64, s64)
DEF_HELPER_FLAGS_1(clz64, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_1(cls64, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_1(cls32, TCG_CALL_NO_RWG_SE, i32, i32)
DEF_HELPER_FLAGS_1(clz32, TCG_CALL_NO_RWG_SE, i32, i32)
DEF_HELPER_FLAGS_1(rbit64, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_3(vfp_cmps_a64, i64, f32, f32, ptr)
DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr)
DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr)
DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr)
DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32)
DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
@ -36,3 +39,10 @@ DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
DEF_HELPER_FLAGS_1(neon_addlp_s8, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_1(neon_addlp_u8, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_1(neon_addlp_s16, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_1(neon_addlp_u16, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env)

View file

@ -1983,6 +1983,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
ARMCPRegInfo pmcr = {
.name = "PMCR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 0,
.access = PL0_RW, .resetvalue = cpu->midr & 0xff000000,
.type = ARM_CP_IO,
.fieldoffset = offsetof(CPUARMState, cp15.c9_pmcr),
.accessfn = pmreg_access, .writefn = pmcr_write,
.raw_writefn = raw_write,
@ -4519,16 +4520,21 @@ float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUARMState *env)
* int->float conversions at run-time. */
#define float64_256 make_float64(0x4070000000000000LL)
#define float64_512 make_float64(0x4080000000000000LL)
#define float32_maxnorm make_float32(0x7f7fffff)
#define float64_maxnorm make_float64(0x7fefffffffffffffLL)
/* The algorithm that must be used to calculate the estimate
* is specified by the ARM ARM.
/* Reciprocal functions
*
* The algorithm that must be used to calculate the estimate
* is specified by the ARM ARM, see FPRecipEstimate()
*/
static float64 recip_estimate(float64 a, CPUARMState *env)
static float64 recip_estimate(float64 a, float_status *real_fp_status)
{
/* These calculations mustn't set any fp exception flags,
* so we use a local copy of the fp_status.
*/
float_status dummy_status = env->vfp.standard_fp_status;
float_status dummy_status = *real_fp_status;
float_status *s = &dummy_status;
/* q = (int)(a * 512.0) */
float64 q = float64_mul(float64_512, a, s);
@ -4549,56 +4555,178 @@ static float64 recip_estimate(float64 a, CPUARMState *env)
return float64_div(int64_to_float64(q_int, s), float64_256, s);
}
float32 HELPER(recpe_f32)(float32 a, CPUARMState *env)
/* Common wrapper to call recip_estimate */
static float64 call_recip_estimate(float64 num, int off, float_status *fpst)
{
float_status *s = &env->vfp.standard_fp_status;
float64 f64;
uint32_t val32 = float32_val(a);
uint64_t val64 = float64_val(num);
uint64_t frac = extract64(val64, 0, 52);
int64_t exp = extract64(val64, 52, 11);
uint64_t sbit;
float64 scaled, estimate;
int result_exp;
int a_exp = (val32 & 0x7f800000) >> 23;
int sign = val32 & 0x80000000;
if (float32_is_any_nan(a)) {
if (float32_is_signaling_nan(a)) {
float_raise(float_flag_invalid, s);
/* Generate the scaled number for the estimate function */
if (exp == 0) {
if (extract64(frac, 51, 1) == 0) {
exp = -1;
frac = extract64(frac, 0, 50) << 2;
} else {
frac = extract64(frac, 0, 51) << 1;
}
return float32_default_nan;
} else if (float32_is_infinity(a)) {
return float32_set_sign(float32_zero, float32_is_neg(a));
} else if (float32_is_zero_or_denormal(a)) {
if (!float32_is_zero(a)) {
float_raise(float_flag_input_denormal, s);
}
float_raise(float_flag_divbyzero, s);
return float32_set_sign(float32_infinity, float32_is_neg(a));
} else if (a_exp >= 253) {
float_raise(float_flag_underflow, s);
return float32_set_sign(float32_zero, float32_is_neg(a));
}
f64 = make_float64((0x3feULL << 52)
| ((int64_t)(val32 & 0x7fffff) << 29));
/* scaled = '0' : '01111111110' : fraction<51:44> : Zeros(44); */
scaled = make_float64((0x3feULL << 52)
| extract64(frac, 44, 8) << 44);
result_exp = 253 - a_exp;
estimate = recip_estimate(scaled, fpst);
f64 = recip_estimate(f64, env);
/* Build new result */
val64 = float64_val(estimate);
sbit = 0x8000000000000000ULL & val64;
exp = off - exp;
frac = extract64(val64, 0, 52);
val32 = sign
| ((result_exp & 0xff) << 23)
| ((float64_val(f64) >> 29) & 0x7fffff);
return make_float32(val32);
if (exp == 0) {
frac = 1ULL << 51 | extract64(frac, 1, 51);
} else if (exp == -1) {
frac = 1ULL << 50 | extract64(frac, 2, 50);
exp = 0;
}
return make_float64(sbit | (exp << 52) | frac);
}
static bool round_to_inf(float_status *fpst, bool sign_bit)
{
switch (fpst->float_rounding_mode) {
case float_round_nearest_even: /* Round to Nearest */
return true;
case float_round_up: /* Round to +Inf */
return !sign_bit;
case float_round_down: /* Round to -Inf */
return sign_bit;
case float_round_to_zero: /* Round to Zero */
return false;
}
g_assert_not_reached();
}
float32 HELPER(recpe_f32)(float32 input, void *fpstp)
{
float_status *fpst = fpstp;
float32 f32 = float32_squash_input_denormal(input, fpst);
uint32_t f32_val = float32_val(f32);
uint32_t f32_sbit = 0x80000000ULL & f32_val;
int32_t f32_exp = extract32(f32_val, 23, 8);
uint32_t f32_frac = extract32(f32_val, 0, 23);
float64 f64, r64;
uint64_t r64_val;
int64_t r64_exp;
uint64_t r64_frac;
if (float32_is_any_nan(f32)) {
float32 nan = f32;
if (float32_is_signaling_nan(f32)) {
float_raise(float_flag_invalid, fpst);
nan = float32_maybe_silence_nan(f32);
}
if (fpst->default_nan_mode) {
nan = float32_default_nan;
}
return nan;
} else if (float32_is_infinity(f32)) {
return float32_set_sign(float32_zero, float32_is_neg(f32));
} else if (float32_is_zero(f32)) {
float_raise(float_flag_divbyzero, fpst);
return float32_set_sign(float32_infinity, float32_is_neg(f32));
} else if ((f32_val & ~(1ULL << 31)) < (1ULL << 21)) {
/* Abs(value) < 2.0^-128 */
float_raise(float_flag_overflow | float_flag_inexact, fpst);
if (round_to_inf(fpst, f32_sbit)) {
return float32_set_sign(float32_infinity, float32_is_neg(f32));
} else {
return float32_set_sign(float32_maxnorm, float32_is_neg(f32));
}
} else if (f32_exp >= 253 && fpst->flush_to_zero) {
float_raise(float_flag_underflow, fpst);
return float32_set_sign(float32_zero, float32_is_neg(f32));
}
f64 = make_float64(((int64_t)(f32_exp) << 52) | (int64_t)(f32_frac) << 29);
r64 = call_recip_estimate(f64, 253, fpst);
r64_val = float64_val(r64);
r64_exp = extract64(r64_val, 52, 11);
r64_frac = extract64(r64_val, 0, 52);
/* result = sign : result_exp<7:0> : fraction<51:29>; */
return make_float32(f32_sbit |
(r64_exp & 0xff) << 23 |
extract64(r64_frac, 29, 24));
}
float64 HELPER(recpe_f64)(float64 input, void *fpstp)
{
float_status *fpst = fpstp;
float64 f64 = float64_squash_input_denormal(input, fpst);
uint64_t f64_val = float64_val(f64);
uint64_t f64_sbit = 0x8000000000000000ULL & f64_val;
int64_t f64_exp = extract64(f64_val, 52, 11);
float64 r64;
uint64_t r64_val;
int64_t r64_exp;
uint64_t r64_frac;
/* Deal with any special cases */
if (float64_is_any_nan(f64)) {
float64 nan = f64;
if (float64_is_signaling_nan(f64)) {
float_raise(float_flag_invalid, fpst);
nan = float64_maybe_silence_nan(f64);
}
if (fpst->default_nan_mode) {
nan = float64_default_nan;
}
return nan;
} else if (float64_is_infinity(f64)) {
return float64_set_sign(float64_zero, float64_is_neg(f64));
} else if (float64_is_zero(f64)) {
float_raise(float_flag_divbyzero, fpst);
return float64_set_sign(float64_infinity, float64_is_neg(f64));
} else if ((f64_val & ~(1ULL << 63)) < (1ULL << 50)) {
/* Abs(value) < 2.0^-1024 */
float_raise(float_flag_overflow | float_flag_inexact, fpst);
if (round_to_inf(fpst, f64_sbit)) {
return float64_set_sign(float64_infinity, float64_is_neg(f64));
} else {
return float64_set_sign(float64_maxnorm, float64_is_neg(f64));
}
} else if (f64_exp >= 1023 && fpst->flush_to_zero) {
float_raise(float_flag_underflow, fpst);
return float64_set_sign(float64_zero, float64_is_neg(f64));
}
r64 = call_recip_estimate(f64, 2045, fpst);
r64_val = float64_val(r64);
r64_exp = extract64(r64_val, 52, 11);
r64_frac = extract64(r64_val, 0, 52);
/* result = sign : result_exp<10:0> : fraction<51:0> */
return make_float64(f64_sbit |
((r64_exp & 0x7ff) << 52) |
r64_frac);
}
/* The algorithm that must be used to calculate the estimate
* is specified by the ARM ARM.
*/
static float64 recip_sqrt_estimate(float64 a, CPUARMState *env)
static float64 recip_sqrt_estimate(float64 a, float_status *real_fp_status)
{
/* These calculations mustn't set any fp exception flags,
* so we use a local copy of the fp_status.
*/
float_status dummy_status = env->vfp.standard_fp_status;
float_status dummy_status = *real_fp_status;
float_status *s = &dummy_status;
float64 q;
int64_t q_int;
@ -4645,49 +4773,64 @@ static float64 recip_sqrt_estimate(float64 a, CPUARMState *env)
return float64_div(int64_to_float64(q_int, s), float64_256, s);
}
float32 HELPER(rsqrte_f32)(float32 a, CPUARMState *env)
float32 HELPER(rsqrte_f32)(float32 input, void *fpstp)
{
float_status *s = &env->vfp.standard_fp_status;
float_status *s = fpstp;
float32 f32 = float32_squash_input_denormal(input, s);
uint32_t val = float32_val(f32);
uint32_t f32_sbit = 0x80000000 & val;
int32_t f32_exp = extract32(val, 23, 8);
uint32_t f32_frac = extract32(val, 0, 23);
uint64_t f64_frac;
uint64_t val64;
int result_exp;
float64 f64;
uint32_t val;
uint64_t val64;
val = float32_val(a);
if (float32_is_any_nan(a)) {
if (float32_is_signaling_nan(a)) {
if (float32_is_any_nan(f32)) {
float32 nan = f32;
if (float32_is_signaling_nan(f32)) {
float_raise(float_flag_invalid, s);
nan = float32_maybe_silence_nan(f32);
}
return float32_default_nan;
} else if (float32_is_zero_or_denormal(a)) {
if (!float32_is_zero(a)) {
float_raise(float_flag_input_denormal, s);
if (s->default_nan_mode) {
nan = float32_default_nan;
}
return nan;
} else if (float32_is_zero(f32)) {
float_raise(float_flag_divbyzero, s);
return float32_set_sign(float32_infinity, float32_is_neg(a));
} else if (float32_is_neg(a)) {
return float32_set_sign(float32_infinity, float32_is_neg(f32));
} else if (float32_is_neg(f32)) {
float_raise(float_flag_invalid, s);
return float32_default_nan;
} else if (float32_is_infinity(a)) {
} else if (float32_is_infinity(f32)) {
return float32_zero;
}
/* Normalize to a double-precision value between 0.25 and 1.0,
/* Scale and normalize to a double-precision value between 0.25 and 1.0,
* preserving the parity of the exponent. */
if ((val & 0x800000) == 0) {
f64 = make_float64(((uint64_t)(val & 0x80000000) << 32)
| (0x3feULL << 52)
| ((uint64_t)(val & 0x7fffff) << 29));
} else {
f64 = make_float64(((uint64_t)(val & 0x80000000) << 32)
| (0x3fdULL << 52)
| ((uint64_t)(val & 0x7fffff) << 29));
f64_frac = ((uint64_t) f32_frac) << 29;
if (f32_exp == 0) {
while (extract64(f64_frac, 51, 1) == 0) {
f64_frac = f64_frac << 1;
f32_exp = f32_exp-1;
}
f64_frac = extract64(f64_frac, 0, 51) << 1;
}
result_exp = (380 - ((val & 0x7f800000) >> 23)) / 2;
if (extract64(f32_exp, 0, 1) == 0) {
f64 = make_float64(((uint64_t) f32_sbit) << 32
| (0x3feULL << 52)
| f64_frac);
} else {
f64 = make_float64(((uint64_t) f32_sbit) << 32
| (0x3fdULL << 52)
| f64_frac);
}
f64 = recip_sqrt_estimate(f64, env);
result_exp = (380 - f32_exp) / 2;
f64 = recip_sqrt_estimate(f64, s);
val64 = float64_val(f64);
@ -4696,8 +4839,72 @@ float32 HELPER(rsqrte_f32)(float32 a, CPUARMState *env)
return make_float32(val);
}
uint32_t HELPER(recpe_u32)(uint32_t a, CPUARMState *env)
float64 HELPER(rsqrte_f64)(float64 input, void *fpstp)
{
float_status *s = fpstp;
float64 f64 = float64_squash_input_denormal(input, s);
uint64_t val = float64_val(f64);
uint64_t f64_sbit = 0x8000000000000000ULL & val;
int64_t f64_exp = extract64(val, 52, 11);
uint64_t f64_frac = extract64(val, 0, 52);
int64_t result_exp;
uint64_t result_frac;
if (float64_is_any_nan(f64)) {
float64 nan = f64;
if (float64_is_signaling_nan(f64)) {
float_raise(float_flag_invalid, s);
nan = float64_maybe_silence_nan(f64);
}
if (s->default_nan_mode) {
nan = float64_default_nan;
}
return nan;
} else if (float64_is_zero(f64)) {
float_raise(float_flag_divbyzero, s);
return float64_set_sign(float64_infinity, float64_is_neg(f64));
} else if (float64_is_neg(f64)) {
float_raise(float_flag_invalid, s);
return float64_default_nan;
} else if (float64_is_infinity(f64)) {
return float64_zero;
}
/* Scale and normalize to a double-precision value between 0.25 and 1.0,
* preserving the parity of the exponent. */
if (f64_exp == 0) {
while (extract64(f64_frac, 51, 1) == 0) {
f64_frac = f64_frac << 1;
f64_exp = f64_exp - 1;
}
f64_frac = extract64(f64_frac, 0, 51) << 1;
}
if (extract64(f64_exp, 0, 1) == 0) {
f64 = make_float64(f64_sbit
| (0x3feULL << 52)
| f64_frac);
} else {
f64 = make_float64(f64_sbit
| (0x3fdULL << 52)
| f64_frac);
}
result_exp = (3068 - f64_exp) / 2;
f64 = recip_sqrt_estimate(f64, s);
result_frac = extract64(float64_val(f64), 0, 52);
return make_float64(f64_sbit |
((result_exp & 0x7ff) << 52) |
result_frac);
}
uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp)
{
float_status *s = fpstp;
float64 f64;
if ((a & 0x80000000) == 0) {
@ -4707,13 +4914,14 @@ uint32_t HELPER(recpe_u32)(uint32_t a, CPUARMState *env)
f64 = make_float64((0x3feULL << 52)
| ((int64_t)(a & 0x7fffffff) << 21));
f64 = recip_estimate (f64, env);
f64 = recip_estimate(f64, s);
return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff);
}
uint32_t HELPER(rsqrte_u32)(uint32_t a, CPUARMState *env)
uint32_t HELPER(rsqrte_u32)(uint32_t a, void *fpstp)
{
float_status *fpst = fpstp;
float64 f64;
if ((a & 0xc0000000) == 0) {
@ -4728,7 +4936,7 @@ uint32_t HELPER(rsqrte_u32)(uint32_t a, CPUARMState *env)
| ((uint64_t)(a & 0x3fffffff) << 22));
}
f64 = recip_sqrt_estimate(f64, env);
f64 = recip_sqrt_estimate(f64, fpst);
return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff);
}

View file

@ -167,10 +167,12 @@ DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, ptr)
DEF_HELPER_3(recps_f32, f32, f32, f32, env)
DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env)
DEF_HELPER_2(recpe_f32, f32, f32, env)
DEF_HELPER_2(rsqrte_f32, f32, f32, env)
DEF_HELPER_2(recpe_u32, i32, i32, env)
DEF_HELPER_2(rsqrte_u32, i32, i32, env)
DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
DEF_HELPER_2(recpe_u32, i32, i32, ptr)
DEF_HELPER_FLAGS_2(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32, ptr)
DEF_HELPER_5(neon_tbl, i32, env, i32, i32, i32, i32)
DEF_HELPER_3(shl_cc, i32, env, i32, i32)

File diff suppressed because it is too large Load diff

View file

@ -6682,17 +6682,33 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
break;
}
case NEON_2RM_VRECPE:
gen_helper_recpe_u32(tmp, tmp, cpu_env);
{
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
gen_helper_recpe_u32(tmp, tmp, fpstatus);
tcg_temp_free_ptr(fpstatus);
break;
}
case NEON_2RM_VRSQRTE:
gen_helper_rsqrte_u32(tmp, tmp, cpu_env);
{
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
gen_helper_rsqrte_u32(tmp, tmp, fpstatus);
tcg_temp_free_ptr(fpstatus);
break;
}
case NEON_2RM_VRECPE_F:
gen_helper_recpe_f32(cpu_F0s, cpu_F0s, cpu_env);
{
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
gen_helper_recpe_f32(cpu_F0s, cpu_F0s, fpstatus);
tcg_temp_free_ptr(fpstatus);
break;
}
case NEON_2RM_VRSQRTE_F:
gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, cpu_env);
{
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, fpstatus);
tcg_temp_free_ptr(fpstatus);
break;
}
case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
gen_vfp_sito(0, 1);
break;
@ -10654,6 +10670,7 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
dc->vec_stride = ARM_TBFLAG_VECSTRIDE(tb->flags);
dc->cp_regs = cpu->cp_regs;
dc->current_pl = arm_current_pl(env);
dc->features = env->features;
cpu_F0s = tcg_temp_new_i32();
cpu_F1s = tcg_temp_new_i32();

View file

@ -26,6 +26,7 @@ typedef struct DisasContext {
int aarch64;
int current_pl;
GHashTable *cp_regs;
uint64_t features; /* CPU features bits */
#define TMP_A64_MAX 16
int tmp_a64_count;
TCGv_i64 tmp_a64[TMP_A64_MAX];
@ -33,6 +34,11 @@ typedef struct DisasContext {
extern TCGv_ptr cpu_env;
static inline int arm_dc_feature(DisasContext *dc, int feature)
{
return (dc->features & (1ULL << feature)) != 0;
}
/* target-specific extra values for is_jmp */
/* These instructions trap after executing, so the A32/T32 decoder must
* defer them until after the conditional execution state has been updated.