Merge commit 4bb2416d42eb from llvm-project (by Jessica Clarke):

[builtins][AArch64] Implement _sync out-of-line atomics

  Whilst Clang does not use these, recent GCC does, and so on systems such
  as FreeBSD that wish to use compiler-rt as the system runtime library
  but also wish to support building programs with GCC these interfaces are
  needed.

  This is a light adaptation of the code committed to GCC by Sebastian Pop
  <spop@amazon.com>, relicensed with permission for use in compiler-rt.

  Fixes https://github.com/llvm/llvm-project/issues/63483

  Reviewed By: sebpop, MaskRay

  Differential Revision: https://reviews.llvm.org/D158536

Reviewed by:	dim
MFC after:	1 week
Differential Revision:	https://reviews.freebsd.org/D41716
This commit is contained in:
Jessica Clarke 2023-09-04 09:33:27 +01:00
parent c190fb35f3
commit 8524dc53fd
2 changed files with 38 additions and 4 deletions

View file

@ -7,7 +7,7 @@
// Out-of-line LSE atomics helpers. Ported from libgcc library. // Out-of-line LSE atomics helpers. Ported from libgcc library.
// N = {1, 2, 4, 8} // N = {1, 2, 4, 8}
// M = {1, 2, 4, 8, 16} // M = {1, 2, 4, 8, 16}
// ORDER = {'relax', 'acq', 'rel', 'acq_rel'} // ORDER = {'relax', 'acq', 'rel', 'acq_rel', 'sync'}
// Routines implemented: // Routines implemented:
// //
// iM __aarch64_casM_ORDER(iM expected, iM desired, iM *ptr) // iM __aarch64_casM_ORDER(iM expected, iM desired, iM *ptr)
@ -35,8 +35,8 @@ HIDDEN(___aarch64_have_lse_atomics)
#endif #endif
// Generate mnemonics for // Generate mnemonics for
// L_cas: SIZE: 1,2,4,8,16 MODEL: 1,2,3,4 // L_cas: SIZE: 1,2,4,8,16 MODEL: 1,2,3,4,5
// L_swp L_ldadd L_ldclr L_ldeor L_ldset: SIZE: 1,2,4,8 MODEL: 1,2,3,4 // L_swp L_ldadd L_ldclr L_ldeor L_ldset: SIZE: 1,2,4,8 MODEL: 1,2,3,4,5
#if SIZE == 1 #if SIZE == 1
#define S b #define S b
@ -64,24 +64,44 @@ HIDDEN(___aarch64_have_lse_atomics)
#define L #define L
#define M 0x000000 #define M 0x000000
#define N 0x000000 #define N 0x000000
#define BARRIER
#elif MODEL == 2 #elif MODEL == 2
#define SUFF _acq #define SUFF _acq
#define A a #define A a
#define L #define L
#define M 0x400000 #define M 0x400000
#define N 0x800000 #define N 0x800000
#define BARRIER
#elif MODEL == 3 #elif MODEL == 3
#define SUFF _rel #define SUFF _rel
#define A #define A
#define L l #define L l
#define M 0x008000 #define M 0x008000
#define N 0x400000 #define N 0x400000
#define BARRIER
#elif MODEL == 4 #elif MODEL == 4
#define SUFF _acq_rel #define SUFF _acq_rel
#define A a #define A a
#define L l #define L l
#define M 0x408000 #define M 0x408000
#define N 0xc00000 #define N 0xc00000
#define BARRIER
#elif MODEL == 5
#define SUFF _sync
#ifdef L_swp
// swp has _acq semantics.
#define A a
#define L
#define M 0x400000
#define N 0x800000
#else
// All other _sync functions have _seq semantics.
#define A a
#define L l
#define M 0x408000
#define N 0xc00000
#endif
#define BARRIER dmb ish
#else #else
#error #error
#endif // MODEL #endif // MODEL
@ -96,7 +116,12 @@ HIDDEN(___aarch64_have_lse_atomics)
#endif #endif
#define NAME(BASE) GLUE4(__aarch64_, BASE, SIZE, SUFF) #define NAME(BASE) GLUE4(__aarch64_, BASE, SIZE, SUFF)
#if MODEL == 5
// Drop A for _sync functions.
#define LDXR GLUE3(ld, xr, S)
#else
#define LDXR GLUE4(ld, A, xr, S) #define LDXR GLUE4(ld, A, xr, S)
#endif
#define STXR GLUE4(st, L, xr, S) #define STXR GLUE4(st, L, xr, S)
// Define temporary registers. // Define temporary registers.
@ -136,9 +161,15 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(cas))
STXR w(tmp1), s(1), [x2] STXR w(tmp1), s(1), [x2]
cbnz w(tmp1), 0b cbnz w(tmp1), 0b
1: 1:
BARRIER
ret ret
#else #else
#if MODEL == 5
// Drop A for _sync functions.
#define LDXP GLUE2(ld, xp)
#else
#define LDXP GLUE3(ld, A, xp) #define LDXP GLUE3(ld, A, xp)
#endif
#define STXP GLUE3(st, L, xp) #define STXP GLUE3(st, L, xp)
#ifdef HAS_ASM_LSE #ifdef HAS_ASM_LSE
#define CASP GLUE3(casp, A, L) x0, x1, x2, x3, [x4] #define CASP GLUE3(casp, A, L) x0, x1, x2, x3, [x4]
@ -159,6 +190,7 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(cas))
STXP w(tmp2), x2, x3, [x4] STXP w(tmp2), x2, x3, [x4]
cbnz w(tmp2), 0b cbnz w(tmp2), 0b
1: 1:
BARRIER
ret ret
#endif #endif
END_COMPILERRT_OUTLINE_FUNCTION(NAME(cas)) END_COMPILERRT_OUTLINE_FUNCTION(NAME(cas))
@ -180,6 +212,7 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(swp))
LDXR s(0), [x1] LDXR s(0), [x1]
STXR w(tmp1), s(tmp0), [x1] STXR w(tmp1), s(tmp0), [x1]
cbnz w(tmp1), 0b cbnz w(tmp1), 0b
BARRIER
ret ret
END_COMPILERRT_OUTLINE_FUNCTION(NAME(swp)) END_COMPILERRT_OUTLINE_FUNCTION(NAME(swp))
#endif // L_swp #endif // L_swp
@ -224,6 +257,7 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(LDNM))
OP s(tmp1), s(0), s(tmp0) OP s(tmp1), s(0), s(tmp0)
STXR w(tmp2), s(tmp1), [x1] STXR w(tmp2), s(tmp1), [x1]
cbnz w(tmp2), 0b cbnz w(tmp2), 0b
BARRIER
ret ret
END_COMPILERRT_OUTLINE_FUNCTION(NAME(LDNM)) END_COMPILERRT_OUTLINE_FUNCTION(NAME(LDNM))
#endif // L_ldadd L_ldclr L_ldeor L_ldset #endif // L_ldadd L_ldclr L_ldeor L_ldset

View file

@ -22,7 +22,7 @@ MK_WERROR.gcc= no
.if ${MACHINE_CPUARCH} == "aarch64" .if ${MACHINE_CPUARCH} == "aarch64"
. for pat in cas swp ldadd ldclr ldeor ldset . for pat in cas swp ldadd ldclr ldeor ldset
. for size in 1 2 4 8 16 . for size in 1 2 4 8 16
. for model in 1 2 3 4 . for model in 1 2 3 4 5
. if ${pat} == "cas" || ${size} != "16" . if ${pat} == "cas" || ${size} != "16"
# Use .for to define lse_name, to get a special loop-local variable # Use .for to define lse_name, to get a special loop-local variable
. for lse_name in outline_atomic_${pat}${size}_${model}.S . for lse_name in outline_atomic_${pat}${size}_${model}.S