avx_sig: Prepare to add arm64 neon test

Move inlined asm code to a separate source and rename x86 specific xmm
names to more general simd names.

Reviewed by:		kib
Differential Revision:	https://reviews.freebsd.org/D40312
This commit is contained in:
Dmitry Chagin 2023-05-30 11:18:57 +03:00
parent 7a292504ba
commit 68348f41c9
3 changed files with 103 additions and 49 deletions

View file

@ -0,0 +1,14 @@
.include <src.opts.mk>
PROG= avx_sig
SRCS= avx_sig.c
MAN=
LIBADD= pthread
.if ${MACHINE_CPUARCH} == "amd64"
SRCS+= c2x2c_amd64.S
.endif
.include <bsd.prog.mk>

View file

@ -48,33 +48,18 @@
#define nitems(x) (sizeof((x)) / sizeof((x)[0]))
#endif
struct xmmreg {
uint8_t xmm_bytes[16];
#define SIMDRNAM "xmm"
struct simdreg {
uint8_t simd_bytes[16];
};
struct xmm {
struct xmmreg xmmreg[16];
struct simd {
struct simdreg simdreg[16];
};
#define X2C(r) asm("movdqu %0, %%xmm" #r : "=m" (xmm->xmmreg[r]))
#define C2X(r) asm("movdqu %%xmm" #r ", %0" : : "m" (xmm->xmmreg[r]) : "xmm" #r)
static void
cpu_to_xmm(struct xmm *xmm)
{
C2X(0); C2X(1); C2X(2); C2X(3); C2X(4); C2X(5); C2X(6); C2X(7);
C2X(8); C2X(9); C2X(10); C2X(11); C2X(12); C2X(13); C2X(14); C2X(15);
}
static void
xmm_to_cpu(struct xmm *xmm)
{
X2C(0); X2C(1); X2C(2); X2C(3); X2C(4); X2C(5); X2C(6); X2C(7);
X2C(8); X2C(9); X2C(10); X2C(11); X2C(12); X2C(13); X2C(14); X2C(15);
}
#undef C2X
#undef X2C
void cpu_to_simd(struct simd *simd);
void simd_to_cpu(struct simd *simd);
static atomic_uint sigs;
@ -96,23 +81,23 @@ sigalrm_handler(int sig __unused)
alarm(TIMO);
}
static struct xmm zero_xmm = {};
static struct simd zero_simd = {};
static void
fill_xmm(struct xmm *xmm)
fill_simd(struct simd *simd)
{
arc4random_buf(xmm, sizeof(*xmm));
arc4random_buf(simd, sizeof(*simd));
}
static void
dump_xmm(const struct xmmreg *r)
dump_simd(const struct simdreg *r)
{
unsigned k;
for (k = 0; k < nitems(r->xmm_bytes); k++) {
for (k = 0; k < nitems(r->simd_bytes); k++) {
if (k != 0)
printf(" ");
printf("%02x", r->xmm_bytes[k]);
printf("%02x", r->simd_bytes[k]);
}
printf("\n");
}
@ -120,9 +105,9 @@ dump_xmm(const struct xmmreg *r)
static pthread_mutex_t show_lock;
static void
show_diff(const struct xmm *xmm1, const struct xmm *xmm2)
show_diff(const struct simd *simd1, const struct simd *simd2)
{
const struct xmmreg *r1, *r2;
const struct simdreg *r1, *r2;
unsigned i, j;
#if defined(__FreeBSD__)
@ -130,14 +115,14 @@ show_diff(const struct xmm *xmm1, const struct xmm *xmm2)
#elif defined(__linux__)
printf("thr %ld\n", syscall(SYS_gettid));
#endif
for (i = 0; i < nitems(xmm1->xmmreg); i++) {
r1 = &xmm1->xmmreg[i];
r2 = &xmm2->xmmreg[i];
for (j = 0; j < nitems(r1->xmm_bytes); j++) {
if (r1->xmm_bytes[j] != r2->xmm_bytes[j]) {
printf("xmm%u\n", i);
dump_xmm(r1);
dump_xmm(r2);
for (i = 0; i < nitems(simd1->simdreg); i++) {
r1 = &simd1->simdreg[i];
r2 = &simd2->simdreg[i];
for (j = 0; j < nitems(r1->simd_bytes); j++) {
if (r1->simd_bytes[j] != r2->simd_bytes[j]) {
printf("%%%s%u\n", SIMDRNAM, i);
dump_simd(r1);
dump_simd(r2);
break;
}
}
@ -153,26 +138,26 @@ my_pause(void)
static void *
worker_thread(void *arg __unused)
{
struct xmm xmm, xmm_cpu;
struct simd simd, simd_cpu;
fill_xmm(&xmm);
fill_simd(&simd);
for (;;) {
xmm_to_cpu(&xmm);
simd_to_cpu(&simd);
my_pause();
cpu_to_xmm(&xmm_cpu);
if (memcmp(&xmm, &xmm_cpu, sizeof(struct xmm)) != 0) {
cpu_to_simd(&simd_cpu);
if (memcmp(&simd, &simd_cpu, sizeof(struct simd)) != 0) {
pthread_mutex_lock(&show_lock);
show_diff(&xmm, &xmm_cpu);
show_diff(&simd, &simd_cpu);
abort();
pthread_mutex_unlock(&show_lock);
}
xmm_to_cpu(&zero_xmm);
simd_to_cpu(&zero_simd);
my_pause();
cpu_to_xmm(&xmm_cpu);
if (memcmp(&zero_xmm, &xmm_cpu, sizeof(struct xmm)) != 0) {
cpu_to_simd(&simd_cpu);
if (memcmp(&zero_simd, &simd_cpu, sizeof(struct simd)) != 0) {
pthread_mutex_lock(&show_lock);
show_diff(&zero_xmm, &xmm_cpu);
show_diff(&zero_simd, &simd_cpu);
abort();
pthread_mutex_unlock(&show_lock);
}

View file

@ -0,0 +1,55 @@
/*
* This file is in public domain.
* Written by Dmitry Chagin <dchagin@FreeBSD.org>
*
* $FreeBSD$
*/
.global cpu_to_simd
.type cpu_to_simd, @function
cpu_to_simd:
movdqu %xmm0, (%rdi)
movdqu %xmm1, 1 * 16(%rdi)
movdqu %xmm2, 2 * 16(%rdi)
movdqu %xmm3, 3 * 16(%rdi)
movdqu %xmm4, 4 * 16(%rdi)
movdqu %xmm5, 5 * 16(%rdi)
movdqu %xmm6, 6 * 16(%rdi)
movdqu %xmm7, 7 * 16(%rdi)
movdqu %xmm8, 8 * 16(%rdi)
movdqu %xmm9, 9 * 16(%rdi)
movdqu %xmm10, 10 * 16(%rdi)
movdqu %xmm11, 11 * 16(%rdi)
movdqu %xmm12, 12 * 16(%rdi)
movdqu %xmm13, 13 * 16(%rdi)
movdqu %xmm14, 14 * 16(%rdi)
movdqu %xmm15, 15 * 16(%rdi)
retq
.size cpu_to_simd, . - cpu_to_simd
.global simd_to_cpu
.type simd_to_cpu, @function
simd_to_cpu:
movdqu (%rdi), %xmm0
movdqu 1 * 16(%rdi), %xmm1
movdqu 2 * 16(%rdi), %xmm2
movdqu 3 * 16(%rdi), %xmm3
movdqu 4 * 16(%rdi), %xmm4
movdqu 5 * 16(%rdi), %xmm5
movdqu 6 * 16(%rdi), %xmm6
movdqu 7 * 16(%rdi), %xmm7
movdqu 8 * 16(%rdi), %xmm8
movdqu 9 * 16(%rdi), %xmm9
movdqu 10 * 16(%rdi), %xmm10
movdqu 11 * 16(%rdi), %xmm11
movdqu 12 * 16(%rdi), %xmm12
movdqu 13 * 16(%rdi), %xmm13
movdqu 14 * 16(%rdi), %xmm14
movdqu 15 * 16(%rdi), %xmm15
retq
.size simd_to_cpu, . - simd_to_cpu
.section .note.GNU-stack,"",@progbits