Locking changes for v6.10:

- Over a dozen code generation micro-optimizations for the atomic
    and spinlock code.
 
  - Add more __ro_after_init attributes
 
  - Robustify the lockdevent_*() macros
 
 Signed-off-by: Ingo Molnar <mingo@kernel.org>
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmZBrMMRHG1pbmdvQGtl
 cm5lbC5vcmcACgkQEnMQ0APhK1gSuA//YyLRTCGtH6d/fCudlzzoa14MHO/QiCv7
 lgmq3Vqif/m+MW7LwQJbLrxDPJPT1mE9Ol9woOc133Cj1QZhF/HQvDAKT9ZpMoXU
 d8U3kuZ7tN41TJuQx6vNSCv3w5ToKeXaQJGxiT6od2Y/0QlhUKhVBSBQVtyc/ma6
 o1Uhq1Qp5KPj928jiqwI0JCZJFqqLvzq/rIT38V05phHEPet4GbLMbz9ZTsw70pm
 xmLzGLXJQ9maziuVcmRUrctsAkbk+VhChQ9p4HrH6AcYPwyQoF+zJr7iocyzIMG2
 xQqhEYShI72lcRft8hZwlrLTKZJWSAkDIxIxaQ2egzsNBwBPbRpP0mUIz3qbwJxQ
 fqzKGxwDmxjiX1Ib4gIVje66hp2QpPX5G1ARoeKvbrHkXxzqVuFlaQBn1+OAQ/GV
 mNzKADxrjalhyiMksHXbEbUNEvXCGqC2N9AOWT6XNvpLDqTJBz/wB+f9cbx3gYEO
 9rXwVicWXLzUnEfbRaEjCrDeMEHMLqhaZIndgCx07JpFkkTtKLD1N9tBxFPNH+SP
 XK7SAsXrxwhBjGbWItfF4eOaPCey+/+kGhOPadfTg3g9zDjEBvX/YNBBw9q2CUWc
 JWd/gct+/Jnnkh1jdIj9yRF2xciVY+iOshHRzG+clo/PhRTwv+DwfMJ/uzn+oaSF
 vOT+exKA8bg=
 =rT48
 -----END PGP SIGNATURE-----

Merge tag 'locking-core-2024-05-13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull locking updates from Ingo Molnar:

 - Over a dozen code generation micro-optimizations for the atomic
   and spinlock code

 - Add more __ro_after_init attributes

 - Robustify the lockdevent_*() macros

* tag 'locking-core-2024-05-13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  locking/pvqspinlock/x86: Use _Q_LOCKED_VAL in PV_UNLOCK_ASM macro
  locking/qspinlock/x86: Micro-optimize virt_spin_lock()
  locking/atomic/x86: Merge __arch{,_try}_cmpxchg64_emu_local() with __arch{,_try}_cmpxchg64_emu()
  locking/atomic/x86: Introduce arch_try_cmpxchg64_local()
  locking/pvqspinlock/x86: Remove redundant CMP after CMPXCHG in __raw_callee_save___pv_queued_spin_unlock()
  locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h
  locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()
  locking/qspinlock: Use atomic_try_cmpxchg_relaxed() in xchg_tail()
  locking/atomic/x86: Define arch_atomic_sub() family using arch_atomic_add() functions
  locking/atomic/x86: Rewrite x86_32 arch_atomic64_{,fetch}_{and,or,xor}() functions
  locking/atomic/x86: Introduce arch_atomic64_read_nonatomic() to x86_32
  locking/atomic/x86: Introduce arch_atomic64_try_cmpxchg() to x86_32
  locking/atomic/x86: Introduce arch_try_cmpxchg64() for !CONFIG_X86_CMPXCHG64
  locking/atomic/x86: Modernize x86_32 arch_{,try_}_cmpxchg64{,_local}()
  locking/atomic/x86: Correct the definition of __arch_try_cmpxchg128()
  x86/tsc: Make __use_tsc __ro_after_init
  x86/kvm: Make kvm_async_pf_enabled __ro_after_init
  context_tracking: Make context_tracking_key __ro_after_init
  jump_label,module: Don't alloc static_key_mod for __ro_after_init keys
  locking/qspinlock: Always evaluate lockevent* non-event parameter once
This commit is contained in:
Linus Torvalds 2024-05-13 17:01:28 -07:00
commit 48fc82c40b
17 changed files with 297 additions and 177 deletions

View file

@ -86,11 +86,7 @@ static __always_inline int arch_atomic_add_return(int i, atomic_t *v)
} }
#define arch_atomic_add_return arch_atomic_add_return #define arch_atomic_add_return arch_atomic_add_return
static __always_inline int arch_atomic_sub_return(int i, atomic_t *v) #define arch_atomic_sub_return(i, v) arch_atomic_add_return(-(i), v)
{
return arch_atomic_add_return(-i, v);
}
#define arch_atomic_sub_return arch_atomic_sub_return
static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v) static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
{ {
@ -98,11 +94,7 @@ static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
} }
#define arch_atomic_fetch_add arch_atomic_fetch_add #define arch_atomic_fetch_add arch_atomic_fetch_add
static __always_inline int arch_atomic_fetch_sub(int i, atomic_t *v) #define arch_atomic_fetch_sub(i, v) arch_atomic_fetch_add(-(i), v)
{
return xadd(&v->counter, -i);
}
#define arch_atomic_fetch_sub arch_atomic_fetch_sub
static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
{ {

View file

@ -14,6 +14,32 @@ typedef struct {
#define ATOMIC64_INIT(val) { (val) } #define ATOMIC64_INIT(val) { (val) }
/*
* Read an atomic64_t non-atomically.
*
* This is intended to be used in cases where a subsequent atomic operation
* will handle the torn value, and can be used to prime the first iteration
* of unconditional try_cmpxchg() loops, e.g.:
*
* s64 val = arch_atomic64_read_nonatomic(v);
* do { } while (!arch_atomic64_try_cmpxchg(v, &val, val OP i);
*
* This is NOT safe to use where the value is not always checked by a
* subsequent atomic operation, such as in conditional try_cmpxchg() loops
* that can break before the atomic operation, e.g.:
*
* s64 val = arch_atomic64_read_nonatomic(v);
* do {
* if (condition(val))
* break;
* } while (!arch_atomic64_try_cmpxchg(v, &val, val OP i);
*/
static __always_inline s64 arch_atomic64_read_nonatomic(const atomic64_t *v)
{
/* See comment in arch_atomic_read(). */
return __READ_ONCE(v->counter);
}
#define __ATOMIC64_DECL(sym) void atomic64_##sym(atomic64_t *, ...) #define __ATOMIC64_DECL(sym) void atomic64_##sym(atomic64_t *, ...)
#ifndef ATOMIC64_EXPORT #ifndef ATOMIC64_EXPORT
#define ATOMIC64_DECL_ONE __ATOMIC64_DECL #define ATOMIC64_DECL_ONE __ATOMIC64_DECL
@ -61,12 +87,18 @@ ATOMIC64_DECL(add_unless);
#undef __ATOMIC64_DECL #undef __ATOMIC64_DECL
#undef ATOMIC64_EXPORT #undef ATOMIC64_EXPORT
static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n) static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
{ {
return arch_cmpxchg64(&v->counter, o, n); return arch_cmpxchg64(&v->counter, old, new);
} }
#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg #define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
{
return arch_try_cmpxchg64(&v->counter, old, new);
}
#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n) static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n)
{ {
s64 o; s64 o;
@ -195,69 +227,62 @@ static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v) static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v)
{ {
s64 old, c = 0; s64 val = arch_atomic64_read_nonatomic(v);
while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c) do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
c = old;
} }
static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
{ {
s64 old, c = 0; s64 val = arch_atomic64_read_nonatomic(v);
while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c) do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
c = old;
return old; return val;
} }
#define arch_atomic64_fetch_and arch_atomic64_fetch_and #define arch_atomic64_fetch_and arch_atomic64_fetch_and
static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v) static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v)
{ {
s64 old, c = 0; s64 val = arch_atomic64_read_nonatomic(v);
while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c) do { } while (!arch_atomic64_try_cmpxchg(v, &val, val | i));
c = old;
} }
static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
{ {
s64 old, c = 0; s64 val = arch_atomic64_read_nonatomic(v);
while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c) do { } while (!arch_atomic64_try_cmpxchg(v, &val, val | i));
c = old;
return old; return val;
} }
#define arch_atomic64_fetch_or arch_atomic64_fetch_or #define arch_atomic64_fetch_or arch_atomic64_fetch_or
static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v) static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v)
{ {
s64 old, c = 0; s64 val = arch_atomic64_read_nonatomic(v);
while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c) do { } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i));
c = old;
} }
static __always_inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v) static __always_inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
{ {
s64 old, c = 0; s64 val = arch_atomic64_read_nonatomic(v);
while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c) do { } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i));
c = old;
return old; return val;
} }
#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor #define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{ {
s64 old, c = 0; s64 val = arch_atomic64_read_nonatomic(v);
while ((old = arch_atomic64_cmpxchg(v, c, c + i)) != c) do { } while (!arch_atomic64_try_cmpxchg(v, &val, val + i));
c = old;
return old; return val;
} }
#define arch_atomic64_fetch_add arch_atomic64_fetch_add #define arch_atomic64_fetch_add arch_atomic64_fetch_add

View file

@ -80,11 +80,7 @@ static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
} }
#define arch_atomic64_add_return arch_atomic64_add_return #define arch_atomic64_add_return arch_atomic64_add_return
static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) #define arch_atomic64_sub_return(i, v) arch_atomic64_add_return(-(i), v)
{
return arch_atomic64_add_return(-i, v);
}
#define arch_atomic64_sub_return arch_atomic64_sub_return
static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{ {
@ -92,11 +88,7 @@ static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
} }
#define arch_atomic64_fetch_add arch_atomic64_fetch_add #define arch_atomic64_fetch_add arch_atomic64_fetch_add
static __always_inline s64 arch_atomic64_fetch_sub(s64 i, atomic64_t *v) #define arch_atomic64_fetch_sub(i, v) arch_atomic64_fetch_add(-(i), v)
{
return xadd(&v->counter, -i);
}
#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub
static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
{ {

View file

@ -3,103 +3,150 @@
#define _ASM_X86_CMPXCHG_32_H #define _ASM_X86_CMPXCHG_32_H
/* /*
* Note: if you use set64_bit(), __cmpxchg64(), or their variants, * Note: if you use __cmpxchg64(), or their variants,
* you need to test for the feature in boot_cpu_data. * you need to test for the feature in boot_cpu_data.
*/ */
union __u64_halves {
u64 full;
struct {
u32 low, high;
};
};
#define __arch_cmpxchg64(_ptr, _old, _new, _lock) \
({ \
union __u64_halves o = { .full = (_old), }, \
n = { .full = (_new), }; \
\
asm volatile(_lock "cmpxchg8b %[ptr]" \
: [ptr] "+m" (*(_ptr)), \
"+a" (o.low), "+d" (o.high) \
: "b" (n.low), "c" (n.high) \
: "memory"); \
\
o.full; \
})
static __always_inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new)
{
return __arch_cmpxchg64(ptr, old, new, LOCK_PREFIX);
}
static __always_inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
{
return __arch_cmpxchg64(ptr, old, new,);
}
#define __arch_try_cmpxchg64(_ptr, _oldp, _new, _lock) \
({ \
union __u64_halves o = { .full = *(_oldp), }, \
n = { .full = (_new), }; \
bool ret; \
\
asm volatile(_lock "cmpxchg8b %[ptr]" \
CC_SET(e) \
: CC_OUT(e) (ret), \
[ptr] "+m" (*(_ptr)), \
"+a" (o.low), "+d" (o.high) \
: "b" (n.low), "c" (n.high) \
: "memory"); \
\
if (unlikely(!ret)) \
*(_oldp) = o.full; \
\
likely(ret); \
})
static __always_inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 new)
{
return __arch_try_cmpxchg64(ptr, oldp, new, LOCK_PREFIX);
}
static __always_inline bool __try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, u64 new)
{
return __arch_try_cmpxchg64(ptr, oldp, new,);
}
#ifdef CONFIG_X86_CMPXCHG64 #ifdef CONFIG_X86_CMPXCHG64
#define arch_cmpxchg64(ptr, o, n) \
((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \
(unsigned long long)(n)))
#define arch_cmpxchg64_local(ptr, o, n) \
((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o), \
(unsigned long long)(n)))
#define arch_try_cmpxchg64(ptr, po, n) \
__try_cmpxchg64((ptr), (unsigned long long *)(po), \
(unsigned long long)(n))
#endif
static inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new) #define arch_cmpxchg64 __cmpxchg64
{
u64 prev;
asm volatile(LOCK_PREFIX "cmpxchg8b %1"
: "=A" (prev),
"+m" (*ptr)
: "b" ((u32)new),
"c" ((u32)(new >> 32)),
"0" (old)
: "memory");
return prev;
}
static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new) #define arch_cmpxchg64_local __cmpxchg64_local
{
u64 prev;
asm volatile("cmpxchg8b %1"
: "=A" (prev),
"+m" (*ptr)
: "b" ((u32)new),
"c" ((u32)(new >> 32)),
"0" (old)
: "memory");
return prev;
}
static inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *pold, u64 new) #define arch_try_cmpxchg64 __try_cmpxchg64
{
bool success;
u64 old = *pold;
asm volatile(LOCK_PREFIX "cmpxchg8b %[ptr]"
CC_SET(z)
: CC_OUT(z) (success),
[ptr] "+m" (*ptr),
"+A" (old)
: "b" ((u32)new),
"c" ((u32)(new >> 32))
: "memory");
if (unlikely(!success)) #define arch_try_cmpxchg64_local __try_cmpxchg64_local
*pold = old;
return success; #else
}
#ifndef CONFIG_X86_CMPXCHG64
/* /*
* Building a kernel capable running on 80386 and 80486. It may be necessary * Building a kernel capable running on 80386 and 80486. It may be necessary
* to simulate the cmpxchg8b on the 80386 and 80486 CPU. * to simulate the cmpxchg8b on the 80386 and 80486 CPU.
*/ */
#define arch_cmpxchg64(ptr, o, n) \ #define __arch_cmpxchg64_emu(_ptr, _old, _new, _lock_loc, _lock) \
({ \ ({ \
__typeof__(*(ptr)) __ret; \ union __u64_halves o = { .full = (_old), }, \
__typeof__(*(ptr)) __old = (o); \ n = { .full = (_new), }; \
__typeof__(*(ptr)) __new = (n); \ \
alternative_io(LOCK_PREFIX_HERE \ asm volatile(ALTERNATIVE(_lock_loc \
"call cmpxchg8b_emu", \ "call cmpxchg8b_emu", \
"lock; cmpxchg8b (%%esi)" , \ _lock "cmpxchg8b %[ptr]", X86_FEATURE_CX8) \
X86_FEATURE_CX8, \ : [ptr] "+m" (*(_ptr)), \
"=A" (__ret), \ "+a" (o.low), "+d" (o.high) \
"S" ((ptr)), "0" (__old), \ : "b" (n.low), "c" (n.high), "S" (_ptr) \
"b" ((unsigned int)__new), \ : "memory"); \
"c" ((unsigned int)(__new>>32)) \ \
: "memory"); \ o.full; \
__ret; }) })
static __always_inline u64 arch_cmpxchg64(volatile u64 *ptr, u64 old, u64 new)
{
return __arch_cmpxchg64_emu(ptr, old, new, LOCK_PREFIX_HERE, "lock; ");
}
#define arch_cmpxchg64 arch_cmpxchg64
#define arch_cmpxchg64_local(ptr, o, n) \ static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
({ \ {
__typeof__(*(ptr)) __ret; \ return __arch_cmpxchg64_emu(ptr, old, new, ,);
__typeof__(*(ptr)) __old = (o); \ }
__typeof__(*(ptr)) __new = (n); \ #define arch_cmpxchg64_local arch_cmpxchg64_local
alternative_io("call cmpxchg8b_emu", \
"cmpxchg8b (%%esi)" , \ #define __arch_try_cmpxchg64_emu(_ptr, _oldp, _new, _lock_loc, _lock) \
X86_FEATURE_CX8, \ ({ \
"=A" (__ret), \ union __u64_halves o = { .full = *(_oldp), }, \
"S" ((ptr)), "0" (__old), \ n = { .full = (_new), }; \
"b" ((unsigned int)__new), \ bool ret; \
"c" ((unsigned int)(__new>>32)) \ \
: "memory"); \ asm volatile(ALTERNATIVE(_lock_loc \
__ret; }) "call cmpxchg8b_emu", \
_lock "cmpxchg8b %[ptr]", X86_FEATURE_CX8) \
CC_SET(e) \
: CC_OUT(e) (ret), \
[ptr] "+m" (*(_ptr)), \
"+a" (o.low), "+d" (o.high) \
: "b" (n.low), "c" (n.high), "S" (_ptr) \
: "memory"); \
\
if (unlikely(!ret)) \
*(_oldp) = o.full; \
\
likely(ret); \
})
static __always_inline bool arch_try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 new)
{
return __arch_try_cmpxchg64_emu(ptr, oldp, new, LOCK_PREFIX_HERE, "lock; ");
}
#define arch_try_cmpxchg64 arch_try_cmpxchg64
static __always_inline bool arch_try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, u64 new)
{
return __arch_try_cmpxchg64_emu(ptr, oldp, new, ,);
}
#define arch_try_cmpxchg64_local arch_try_cmpxchg64_local
#endif #endif

View file

@ -20,6 +20,12 @@
arch_try_cmpxchg((ptr), (po), (n)); \ arch_try_cmpxchg((ptr), (po), (n)); \
}) })
#define arch_try_cmpxchg64_local(ptr, po, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
arch_try_cmpxchg_local((ptr), (po), (n)); \
})
union __u128_halves { union __u128_halves {
u128 full; u128 full;
struct { struct {
@ -62,7 +68,7 @@ static __always_inline u128 arch_cmpxchg128_local(volatile u128 *ptr, u128 old,
asm volatile(_lock "cmpxchg16b %[ptr]" \ asm volatile(_lock "cmpxchg16b %[ptr]" \
CC_SET(e) \ CC_SET(e) \
: CC_OUT(e) (ret), \ : CC_OUT(e) (ret), \
[ptr] "+m" (*ptr), \ [ptr] "+m" (*(_ptr)), \
"+a" (o.low), "+d" (o.high) \ "+a" (o.low), "+d" (o.high) \
: "b" (n.low), "c" (n.high) \ : "b" (n.low), "c" (n.high) \
: "memory"); \ : "memory"); \

View file

@ -85,6 +85,8 @@ DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key);
#define virt_spin_lock virt_spin_lock #define virt_spin_lock virt_spin_lock
static inline bool virt_spin_lock(struct qspinlock *lock) static inline bool virt_spin_lock(struct qspinlock *lock)
{ {
int val;
if (!static_branch_likely(&virt_spin_lock_key)) if (!static_branch_likely(&virt_spin_lock_key))
return false; return false;
@ -94,10 +96,13 @@ static inline bool virt_spin_lock(struct qspinlock *lock)
* horrible lock 'holder' preemption issues. * horrible lock 'holder' preemption issues.
*/ */
do { __retry:
while (atomic_read(&lock->val) != 0) val = atomic_read(&lock->val);
cpu_relax();
} while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); if (val || !atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)) {
cpu_relax();
goto __retry;
}
return true; return true;
} }

View file

@ -25,9 +25,9 @@ __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text");
* *
* void __lockfunc __pv_queued_spin_unlock(struct qspinlock *lock) * void __lockfunc __pv_queued_spin_unlock(struct qspinlock *lock)
* { * {
* u8 lockval = cmpxchg(&lock->locked, _Q_LOCKED_VAL, 0); * u8 lockval = _Q_LOCKED_VAL;
* *
* if (likely(lockval == _Q_LOCKED_VAL)) * if (try_cmpxchg(&lock->locked, &lockval, 0))
* return; * return;
* pv_queued_spin_unlock_slowpath(lock, lockval); * pv_queued_spin_unlock_slowpath(lock, lockval);
* } * }
@ -40,10 +40,9 @@ __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text");
#define PV_UNLOCK_ASM \ #define PV_UNLOCK_ASM \
FRAME_BEGIN \ FRAME_BEGIN \
"push %rdx\n\t" \ "push %rdx\n\t" \
"mov $0x1,%eax\n\t" \ "mov $" __stringify(_Q_LOCKED_VAL) ",%eax\n\t" \
"xor %edx,%edx\n\t" \ "xor %edx,%edx\n\t" \
LOCK_PREFIX "cmpxchg %dl,(%rdi)\n\t" \ LOCK_PREFIX "cmpxchg %dl,(%rdi)\n\t" \
"cmp $0x1,%al\n\t" \
"jne .slowpath\n\t" \ "jne .slowpath\n\t" \
"pop %rdx\n\t" \ "pop %rdx\n\t" \
FRAME_END \ FRAME_END \

View file

@ -44,7 +44,7 @@
#include <asm/svm.h> #include <asm/svm.h>
#include <asm/e820/api.h> #include <asm/e820/api.h>
DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled); DEFINE_STATIC_KEY_FALSE_RO(kvm_async_pf_enabled);
static int kvmapf = 1; static int kvmapf = 1;

View file

@ -44,7 +44,7 @@ EXPORT_SYMBOL(tsc_khz);
static int __read_mostly tsc_unstable; static int __read_mostly tsc_unstable;
static unsigned int __initdata tsc_early_khz; static unsigned int __initdata tsc_early_khz;
static DEFINE_STATIC_KEY_FALSE(__use_tsc); static DEFINE_STATIC_KEY_FALSE_RO(__use_tsc);
int tsc_clocksource_reliable; int tsc_clocksource_reliable;

View file

@ -180,6 +180,11 @@ static inline bool is_kernel_rodata(unsigned long addr)
addr < (unsigned long)__end_rodata; addr < (unsigned long)__end_rodata;
} }
static inline bool is_kernel_ro_after_init(unsigned long addr)
{
return addr >= (unsigned long)__start_ro_after_init &&
addr < (unsigned long)__end_ro_after_init;
}
/** /**
* is_kernel_inittext - checks if the pointer address is located in the * is_kernel_inittext - checks if the pointer address is located in the
* .init.text section * .init.text section

View file

@ -216,6 +216,7 @@ extern struct jump_entry __start___jump_table[];
extern struct jump_entry __stop___jump_table[]; extern struct jump_entry __stop___jump_table[];
extern void jump_label_init(void); extern void jump_label_init(void);
extern void jump_label_init_ro(void);
extern void jump_label_lock(void); extern void jump_label_lock(void);
extern void jump_label_unlock(void); extern void jump_label_unlock(void);
extern void arch_jump_label_transform(struct jump_entry *entry, extern void arch_jump_label_transform(struct jump_entry *entry,
@ -265,6 +266,8 @@ static __always_inline void jump_label_init(void)
static_key_initialized = true; static_key_initialized = true;
} }
static __always_inline void jump_label_init_ro(void) { }
static __always_inline bool static_key_false(struct static_key *key) static __always_inline bool static_key_false(struct static_key *key)
{ {
if (unlikely_notrace(static_key_count(key) > 0)) if (unlikely_notrace(static_key_count(key) > 0))

View file

@ -1415,6 +1415,7 @@ static void mark_readonly(void)
* insecure pages which are W+X. * insecure pages which are W+X.
*/ */
flush_module_init_free_work(); flush_module_init_free_work();
jump_label_init_ro();
mark_rodata_ro(); mark_rodata_ro();
debug_checkwx(); debug_checkwx();
rodata_test(); rodata_test();

View file

@ -432,7 +432,7 @@ static __always_inline void ct_kernel_enter(bool user, int offset) { }
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/context_tracking.h> #include <trace/events/context_tracking.h>
DEFINE_STATIC_KEY_FALSE(context_tracking_key); DEFINE_STATIC_KEY_FALSE_RO(context_tracking_key);
EXPORT_SYMBOL_GPL(context_tracking_key); EXPORT_SYMBOL_GPL(context_tracking_key);
static noinstr bool context_tracking_recursion_enter(void) static noinstr bool context_tracking_recursion_enter(void)

View file

@ -530,6 +530,45 @@ void __init jump_label_init(void)
cpus_read_unlock(); cpus_read_unlock();
} }
static inline bool static_key_sealed(struct static_key *key)
{
return (key->type & JUMP_TYPE_LINKED) && !(key->type & ~JUMP_TYPE_MASK);
}
static inline void static_key_seal(struct static_key *key)
{
unsigned long type = key->type & JUMP_TYPE_TRUE;
key->type = JUMP_TYPE_LINKED | type;
}
void jump_label_init_ro(void)
{
struct jump_entry *iter_start = __start___jump_table;
struct jump_entry *iter_stop = __stop___jump_table;
struct jump_entry *iter;
if (WARN_ON_ONCE(!static_key_initialized))
return;
cpus_read_lock();
jump_label_lock();
for (iter = iter_start; iter < iter_stop; iter++) {
struct static_key *iterk = jump_entry_key(iter);
if (!is_kernel_ro_after_init((unsigned long)iterk))
continue;
if (static_key_sealed(iterk))
continue;
static_key_seal(iterk);
}
jump_label_unlock();
cpus_read_unlock();
}
#ifdef CONFIG_MODULES #ifdef CONFIG_MODULES
enum jump_label_type jump_label_init_type(struct jump_entry *entry) enum jump_label_type jump_label_init_type(struct jump_entry *entry)
@ -650,6 +689,15 @@ static int jump_label_add_module(struct module *mod)
static_key_set_entries(key, iter); static_key_set_entries(key, iter);
continue; continue;
} }
/*
* If the key was sealed at init, then there's no need to keep a
* reference to its module entries - just patch them now and be
* done with it.
*/
if (static_key_sealed(key))
goto do_poke;
jlm = kzalloc(sizeof(struct static_key_mod), GFP_KERNEL); jlm = kzalloc(sizeof(struct static_key_mod), GFP_KERNEL);
if (!jlm) if (!jlm)
return -ENOMEM; return -ENOMEM;
@ -675,6 +723,7 @@ static int jump_label_add_module(struct module *mod)
static_key_set_linked(key); static_key_set_linked(key);
/* Only update if we've changed from our initial state */ /* Only update if we've changed from our initial state */
do_poke:
if (jump_label_type(iter) != jump_label_init_type(iter)) if (jump_label_type(iter) != jump_label_init_type(iter))
__jump_label_update(key, iter, iter_stop, true); __jump_label_update(key, iter, iter_stop, true);
} }
@ -699,6 +748,10 @@ static void jump_label_del_module(struct module *mod)
if (within_module((unsigned long)key, mod)) if (within_module((unsigned long)key, mod))
continue; continue;
/* No @jlm allocated because key was sealed at init. */
if (static_key_sealed(key))
continue;
/* No memory during module load */ /* No memory during module load */
if (WARN_ON(!static_key_linked(key))) if (WARN_ON(!static_key_linked(key)))
continue; continue;

View file

@ -53,8 +53,8 @@ static inline void __lockevent_add(enum lock_events event, int inc)
#else /* CONFIG_LOCK_EVENT_COUNTS */ #else /* CONFIG_LOCK_EVENT_COUNTS */
#define lockevent_inc(ev) #define lockevent_inc(ev)
#define lockevent_add(ev, c) #define lockevent_add(ev, c) do { (void)(c); } while (0)
#define lockevent_cond_inc(ev, c) #define lockevent_cond_inc(ev, c) do { (void)(c); } while (0)
#endif /* CONFIG_LOCK_EVENT_COUNTS */ #endif /* CONFIG_LOCK_EVENT_COUNTS */

View file

@ -220,21 +220,18 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
*/ */
static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
{ {
u32 old, new, val = atomic_read(&lock->val); u32 old, new;
for (;;) { old = atomic_read(&lock->val);
new = (val & _Q_LOCKED_PENDING_MASK) | tail; do {
new = (old & _Q_LOCKED_PENDING_MASK) | tail;
/* /*
* We can use relaxed semantics since the caller ensures that * We can use relaxed semantics since the caller ensures that
* the MCS node is properly initialized before updating the * the MCS node is properly initialized before updating the
* tail. * tail.
*/ */
old = atomic_cmpxchg_relaxed(&lock->val, val, new); } while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new));
if (old == val)
break;
val = old;
}
return old; return old;
} }
#endif /* _Q_PENDING_BITS == 8 */ #endif /* _Q_PENDING_BITS == 8 */

View file

@ -86,9 +86,10 @@ static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock)
*/ */
for (;;) { for (;;) {
int val = atomic_read(&lock->val); int val = atomic_read(&lock->val);
u8 old = 0;
if (!(val & _Q_LOCKED_PENDING_MASK) && if (!(val & _Q_LOCKED_PENDING_MASK) &&
(cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) { try_cmpxchg_acquire(&lock->locked, &old, _Q_LOCKED_VAL)) {
lockevent_inc(pv_lock_stealing); lockevent_inc(pv_lock_stealing);
return true; return true;
} }
@ -116,11 +117,12 @@ static __always_inline void set_pending(struct qspinlock *lock)
* barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the * barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the
* lock just to be sure that it will get it. * lock just to be sure that it will get it.
*/ */
static __always_inline int trylock_clear_pending(struct qspinlock *lock) static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
{ {
u16 old = _Q_PENDING_VAL;
return !READ_ONCE(lock->locked) && return !READ_ONCE(lock->locked) &&
(cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL, try_cmpxchg_acquire(&lock->locked_pending, &old, _Q_LOCKED_VAL);
_Q_LOCKED_VAL) == _Q_PENDING_VAL);
} }
#else /* _Q_PENDING_BITS == 8 */ #else /* _Q_PENDING_BITS == 8 */
static __always_inline void set_pending(struct qspinlock *lock) static __always_inline void set_pending(struct qspinlock *lock)
@ -128,27 +130,21 @@ static __always_inline void set_pending(struct qspinlock *lock)
atomic_or(_Q_PENDING_VAL, &lock->val); atomic_or(_Q_PENDING_VAL, &lock->val);
} }
static __always_inline int trylock_clear_pending(struct qspinlock *lock) static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
{ {
int val = atomic_read(&lock->val); int old, new;
for (;;) {
int old, new;
if (val & _Q_LOCKED_MASK)
break;
old = atomic_read(&lock->val);
do {
if (old & _Q_LOCKED_MASK)
return false;
/* /*
* Try to clear pending bit & set locked bit * Try to clear pending bit & set locked bit
*/ */
old = val; new = (old & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL; } while (!atomic_try_cmpxchg_acquire (&lock->val, &old, new));
val = atomic_cmpxchg_acquire(&lock->val, old, new);
if (val == old) return true;
return 1;
}
return 0;
} }
#endif /* _Q_PENDING_BITS == 8 */ #endif /* _Q_PENDING_BITS == 8 */
@ -216,8 +212,9 @@ static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
int hopcnt = 0; int hopcnt = 0;
for_each_hash_entry(he, offset, hash) { for_each_hash_entry(he, offset, hash) {
struct qspinlock *old = NULL;
hopcnt++; hopcnt++;
if (!cmpxchg(&he->lock, NULL, lock)) { if (try_cmpxchg(&he->lock, &old, lock)) {
WRITE_ONCE(he->node, node); WRITE_ONCE(he->node, node);
lockevent_pv_hop(hopcnt); lockevent_pv_hop(hopcnt);
return &he->lock; return &he->lock;
@ -294,7 +291,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
{ {
struct pv_node *pn = (struct pv_node *)node; struct pv_node *pn = (struct pv_node *)node;
struct pv_node *pp = (struct pv_node *)prev; struct pv_node *pp = (struct pv_node *)prev;
bool __maybe_unused wait_early; bool wait_early;
int loop; int loop;
for (;;) { for (;;) {
@ -360,7 +357,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
{ {
struct pv_node *pn = (struct pv_node *)node; struct pv_node *pn = (struct pv_node *)node;
enum vcpu_state old = vcpu_halted;
/* /*
* If the vCPU is indeed halted, advance its state to match that of * If the vCPU is indeed halted, advance its state to match that of
* pv_wait_node(). If OTOH this fails, the vCPU was running and will * pv_wait_node(). If OTOH this fails, the vCPU was running and will
@ -377,8 +374,7 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
* subsequent writes. * subsequent writes.
*/ */
smp_mb__before_atomic(); smp_mb__before_atomic();
if (cmpxchg_relaxed(&pn->state, vcpu_halted, vcpu_hashed) if (!try_cmpxchg_relaxed(&pn->state, &old, vcpu_hashed))
!= vcpu_halted)
return; return;
/* /*
@ -546,15 +542,14 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
#ifndef __pv_queued_spin_unlock #ifndef __pv_queued_spin_unlock
__visible __lockfunc void __pv_queued_spin_unlock(struct qspinlock *lock) __visible __lockfunc void __pv_queued_spin_unlock(struct qspinlock *lock)
{ {
u8 locked; u8 locked = _Q_LOCKED_VAL;
/* /*
* We must not unlock if SLOW, because in that case we must first * We must not unlock if SLOW, because in that case we must first
* unhash. Otherwise it would be possible to have multiple @lock * unhash. Otherwise it would be possible to have multiple @lock
* entries, which would be BAD. * entries, which would be BAD.
*/ */
locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0); if (try_cmpxchg_release(&lock->locked, &locked, 0))
if (likely(locked == _Q_LOCKED_VAL))
return; return;
__pv_queued_spin_unlock_slowpath(lock, locked); __pv_queued_spin_unlock_slowpath(lock, locked);